ada: Fix wrong finalization for call to BIP function in conditional expression
[official-gcc.git] / gcc / tree-vect-stmts.cc
blobbd3b07a3aa12b94e3edf91b3a5b0168d3aa21c39
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
67 return STMT_VINFO_VECTYPE (stmt_info);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
80 if (!loop_vinfo)
81 return false;
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
85 return (bb->loop_father == loop->inner);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
92 static unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind,
95 stmt_vec_info stmt_info, slp_tree node,
96 tree vectype, int misalign,
97 enum vect_cost_model_location where)
99 if ((kind == vector_load || kind == unaligned_load)
100 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
101 kind = vector_gather_load;
102 if ((kind == vector_store || kind == unaligned_store)
103 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
104 kind = vector_scatter_store;
106 stmt_info_for_cost si
107 = { count, kind, where, stmt_info, node, vectype, misalign };
108 body_cost_vec->safe_push (si);
110 return (unsigned)
111 (builtin_vectorization_cost (kind, vectype, misalign) * count);
114 unsigned
115 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
116 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
117 tree vectype, int misalign,
118 enum vect_cost_model_location where)
120 return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
121 vectype, misalign, where);
124 unsigned
125 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
126 enum vect_cost_for_stmt kind, slp_tree node,
127 tree vectype, int misalign,
128 enum vect_cost_model_location where)
130 return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
131 vectype, misalign, where);
134 unsigned
135 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
136 enum vect_cost_for_stmt kind,
137 enum vect_cost_model_location where)
139 gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
140 || kind == scalar_stmt);
141 return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
142 NULL_TREE, 0, where);
145 /* Return a variable of type ELEM_TYPE[NELEMS]. */
147 static tree
148 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
150 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
151 "vect_array");
154 /* ARRAY is an array of vectors created by create_vector_array.
155 Return an SSA_NAME for the vector in index N. The reference
156 is part of the vectorization of STMT_INFO and the vector is associated
157 with scalar destination SCALAR_DEST. */
159 static tree
160 read_vector_array (vec_info *vinfo,
161 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
162 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
164 tree vect_type, vect, vect_name, array_ref;
165 gimple *new_stmt;
167 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
168 vect_type = TREE_TYPE (TREE_TYPE (array));
169 vect = vect_create_destination_var (scalar_dest, vect_type);
170 array_ref = build4 (ARRAY_REF, vect_type, array,
171 build_int_cst (size_type_node, n),
172 NULL_TREE, NULL_TREE);
174 new_stmt = gimple_build_assign (vect, array_ref);
175 vect_name = make_ssa_name (vect, new_stmt);
176 gimple_assign_set_lhs (new_stmt, vect_name);
177 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
179 return vect_name;
182 /* ARRAY is an array of vectors created by create_vector_array.
183 Emit code to store SSA_NAME VECT in index N of the array.
184 The store is part of the vectorization of STMT_INFO. */
186 static void
187 write_vector_array (vec_info *vinfo,
188 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
189 tree vect, tree array, unsigned HOST_WIDE_INT n)
191 tree array_ref;
192 gimple *new_stmt;
194 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
195 build_int_cst (size_type_node, n),
196 NULL_TREE, NULL_TREE);
198 new_stmt = gimple_build_assign (array_ref, vect);
199 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
202 /* PTR is a pointer to an array of type TYPE. Return a representation
203 of *PTR. The memory reference replaces those in FIRST_DR
204 (and its group). */
206 static tree
207 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
209 tree mem_ref;
211 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
212 /* Arrays have the same alignment as their type. */
213 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
214 return mem_ref;
217 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
218 Emit the clobber before *GSI. */
220 static void
221 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
222 gimple_stmt_iterator *gsi, tree var)
224 tree clobber = build_clobber (TREE_TYPE (var));
225 gimple *new_stmt = gimple_build_assign (var, clobber);
226 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
229 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
231 /* Function vect_mark_relevant.
233 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
235 static void
236 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
237 enum vect_relevant relevant, bool live_p)
239 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
240 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
242 if (dump_enabled_p ())
243 dump_printf_loc (MSG_NOTE, vect_location,
244 "mark relevant %d, live %d: %G", relevant, live_p,
245 stmt_info->stmt);
247 /* If this stmt is an original stmt in a pattern, we might need to mark its
248 related pattern stmt instead of the original stmt. However, such stmts
249 may have their own uses that are not in any pattern, in such cases the
250 stmt itself should be marked. */
251 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
253 /* This is the last stmt in a sequence that was detected as a
254 pattern that can potentially be vectorized. Don't mark the stmt
255 as relevant/live because it's not going to be vectorized.
256 Instead mark the pattern-stmt that replaces it. */
258 if (dump_enabled_p ())
259 dump_printf_loc (MSG_NOTE, vect_location,
260 "last stmt in pattern. don't mark"
261 " relevant/live.\n");
262 stmt_vec_info old_stmt_info = stmt_info;
263 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
264 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
265 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
266 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
269 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
270 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
271 STMT_VINFO_RELEVANT (stmt_info) = relevant;
273 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
274 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
276 if (dump_enabled_p ())
277 dump_printf_loc (MSG_NOTE, vect_location,
278 "already marked relevant/live.\n");
279 return;
282 worklist->safe_push (stmt_info);
286 /* Function is_simple_and_all_uses_invariant
288 Return true if STMT_INFO is simple and all uses of it are invariant. */
290 bool
291 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
292 loop_vec_info loop_vinfo)
294 tree op;
295 ssa_op_iter iter;
297 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
298 if (!stmt)
299 return false;
301 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
303 enum vect_def_type dt = vect_uninitialized_def;
305 if (!vect_is_simple_use (op, loop_vinfo, &dt))
307 if (dump_enabled_p ())
308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
309 "use not simple.\n");
310 return false;
313 if (dt != vect_external_def && dt != vect_constant_def)
314 return false;
316 return true;
319 /* Function vect_stmt_relevant_p.
321 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
322 is "relevant for vectorization".
324 A stmt is considered "relevant for vectorization" if:
325 - it has uses outside the loop.
326 - it has vdefs (it alters memory).
327 - control stmts in the loop (except for the exit condition).
329 CHECKME: what other side effects would the vectorizer allow? */
331 static bool
332 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
333 enum vect_relevant *relevant, bool *live_p)
335 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
336 ssa_op_iter op_iter;
337 imm_use_iterator imm_iter;
338 use_operand_p use_p;
339 def_operand_p def_p;
341 *relevant = vect_unused_in_scope;
342 *live_p = false;
344 /* cond stmt other than loop exit cond. */
345 if (is_ctrl_stmt (stmt_info->stmt)
346 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
347 *relevant = vect_used_in_scope;
349 /* changing memory. */
350 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
351 if (gimple_vdef (stmt_info->stmt)
352 && !gimple_clobber_p (stmt_info->stmt))
354 if (dump_enabled_p ())
355 dump_printf_loc (MSG_NOTE, vect_location,
356 "vec_stmt_relevant_p: stmt has vdefs.\n");
357 *relevant = vect_used_in_scope;
360 /* uses outside the loop. */
361 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
363 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
365 basic_block bb = gimple_bb (USE_STMT (use_p));
366 if (!flow_bb_inside_loop_p (loop, bb))
368 if (is_gimple_debug (USE_STMT (use_p)))
369 continue;
371 if (dump_enabled_p ())
372 dump_printf_loc (MSG_NOTE, vect_location,
373 "vec_stmt_relevant_p: used out of loop.\n");
375 /* We expect all such uses to be in the loop exit phis
376 (because of loop closed form) */
377 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
378 gcc_assert (bb == single_exit (loop)->dest);
380 *live_p = true;
385 if (*live_p && *relevant == vect_unused_in_scope
386 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_NOTE, vect_location,
390 "vec_stmt_relevant_p: stmt live but not relevant.\n");
391 *relevant = vect_used_only_live;
394 return (*live_p || *relevant);
398 /* Function exist_non_indexing_operands_for_use_p
400 USE is one of the uses attached to STMT_INFO. Check if USE is
401 used in STMT_INFO for anything other than indexing an array. */
403 static bool
404 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
406 tree operand;
408 /* USE corresponds to some operand in STMT. If there is no data
409 reference in STMT, then any operand that corresponds to USE
410 is not indexing an array. */
411 if (!STMT_VINFO_DATA_REF (stmt_info))
412 return true;
414 /* STMT has a data_ref. FORNOW this means that its of one of
415 the following forms:
416 -1- ARRAY_REF = var
417 -2- var = ARRAY_REF
418 (This should have been verified in analyze_data_refs).
420 'var' in the second case corresponds to a def, not a use,
421 so USE cannot correspond to any operands that are not used
422 for array indexing.
424 Therefore, all we need to check is if STMT falls into the
425 first case, and whether var corresponds to USE. */
427 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
428 if (!assign || !gimple_assign_copy_p (assign))
430 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
431 if (call && gimple_call_internal_p (call))
433 internal_fn ifn = gimple_call_internal_fn (call);
434 int mask_index = internal_fn_mask_index (ifn);
435 if (mask_index >= 0
436 && use == gimple_call_arg (call, mask_index))
437 return true;
438 int stored_value_index = internal_fn_stored_value_index (ifn);
439 if (stored_value_index >= 0
440 && use == gimple_call_arg (call, stored_value_index))
441 return true;
442 if (internal_gather_scatter_fn_p (ifn)
443 && use == gimple_call_arg (call, 1))
444 return true;
446 return false;
449 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
450 return false;
451 operand = gimple_assign_rhs1 (assign);
452 if (TREE_CODE (operand) != SSA_NAME)
453 return false;
455 if (operand == use)
456 return true;
458 return false;
463 Function process_use.
465 Inputs:
466 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
467 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
468 that defined USE. This is done by calling mark_relevant and passing it
469 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
470 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
471 be performed.
473 Outputs:
474 Generally, LIVE_P and RELEVANT are used to define the liveness and
475 relevance info of the DEF_STMT of this USE:
476 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
477 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
478 Exceptions:
479 - case 1: If USE is used only for address computations (e.g. array indexing),
480 which does not need to be directly vectorized, then the liveness/relevance
481 of the respective DEF_STMT is left unchanged.
482 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
483 we skip DEF_STMT cause it had already been processed.
484 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
485 "relevant" will be modified accordingly.
487 Return true if everything is as expected. Return false otherwise. */
489 static opt_result
490 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
491 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
492 bool force)
494 stmt_vec_info dstmt_vinfo;
495 enum vect_def_type dt;
497 /* case 1: we are only interested in uses that need to be vectorized. Uses
498 that are used for address computation are not considered relevant. */
499 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
500 return opt_result::success ();
502 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
503 return opt_result::failure_at (stmt_vinfo->stmt,
504 "not vectorized:"
505 " unsupported use in stmt.\n");
507 if (!dstmt_vinfo)
508 return opt_result::success ();
510 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
511 basic_block bb = gimple_bb (stmt_vinfo->stmt);
513 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
514 We have to force the stmt live since the epilogue loop needs it to
515 continue computing the reduction. */
516 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
517 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
518 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
519 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
520 && bb->loop_father == def_bb->loop_father)
522 if (dump_enabled_p ())
523 dump_printf_loc (MSG_NOTE, vect_location,
524 "reduc-stmt defining reduc-phi in the same nest.\n");
525 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
526 return opt_result::success ();
529 /* case 3a: outer-loop stmt defining an inner-loop stmt:
530 outer-loop-header-bb:
531 d = dstmt_vinfo
532 inner-loop:
533 stmt # use (d)
534 outer-loop-tail-bb:
535 ... */
536 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
538 if (dump_enabled_p ())
539 dump_printf_loc (MSG_NOTE, vect_location,
540 "outer-loop def-stmt defining inner-loop stmt.\n");
542 switch (relevant)
544 case vect_unused_in_scope:
545 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
546 vect_used_in_scope : vect_unused_in_scope;
547 break;
549 case vect_used_in_outer_by_reduction:
550 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
551 relevant = vect_used_by_reduction;
552 break;
554 case vect_used_in_outer:
555 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
556 relevant = vect_used_in_scope;
557 break;
559 case vect_used_in_scope:
560 break;
562 default:
563 gcc_unreachable ();
567 /* case 3b: inner-loop stmt defining an outer-loop stmt:
568 outer-loop-header-bb:
570 inner-loop:
571 d = dstmt_vinfo
572 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
573 stmt # use (d) */
574 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
576 if (dump_enabled_p ())
577 dump_printf_loc (MSG_NOTE, vect_location,
578 "inner-loop def-stmt defining outer-loop stmt.\n");
580 switch (relevant)
582 case vect_unused_in_scope:
583 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
584 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
585 vect_used_in_outer_by_reduction : vect_unused_in_scope;
586 break;
588 case vect_used_by_reduction:
589 case vect_used_only_live:
590 relevant = vect_used_in_outer_by_reduction;
591 break;
593 case vect_used_in_scope:
594 relevant = vect_used_in_outer;
595 break;
597 default:
598 gcc_unreachable ();
601 /* We are also not interested in uses on loop PHI backedges that are
602 inductions. Otherwise we'll needlessly vectorize the IV increment
603 and cause hybrid SLP for SLP inductions. Unless the PHI is live
604 of course. */
605 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
606 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
607 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
608 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
609 loop_latch_edge (bb->loop_father))
610 == use))
612 if (dump_enabled_p ())
613 dump_printf_loc (MSG_NOTE, vect_location,
614 "induction value on backedge.\n");
615 return opt_result::success ();
619 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
620 return opt_result::success ();
624 /* Function vect_mark_stmts_to_be_vectorized.
626 Not all stmts in the loop need to be vectorized. For example:
628 for i...
629 for j...
630 1. T0 = i + j
631 2. T1 = a[T0]
633 3. j = j + 1
635 Stmt 1 and 3 do not need to be vectorized, because loop control and
636 addressing of vectorized data-refs are handled differently.
638 This pass detects such stmts. */
640 opt_result
641 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
643 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
644 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
645 unsigned int nbbs = loop->num_nodes;
646 gimple_stmt_iterator si;
647 unsigned int i;
648 basic_block bb;
649 bool live_p;
650 enum vect_relevant relevant;
652 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
654 auto_vec<stmt_vec_info, 64> worklist;
656 /* 1. Init worklist. */
657 for (i = 0; i < nbbs; i++)
659 bb = bbs[i];
660 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
662 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
663 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
665 phi_info->stmt);
667 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
668 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
670 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
672 if (is_gimple_debug (gsi_stmt (si)))
673 continue;
674 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
675 if (dump_enabled_p ())
676 dump_printf_loc (MSG_NOTE, vect_location,
677 "init: stmt relevant? %G", stmt_info->stmt);
679 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
680 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
684 /* 2. Process_worklist */
685 while (worklist.length () > 0)
687 use_operand_p use_p;
688 ssa_op_iter iter;
690 stmt_vec_info stmt_vinfo = worklist.pop ();
691 if (dump_enabled_p ())
692 dump_printf_loc (MSG_NOTE, vect_location,
693 "worklist: examine stmt: %G", stmt_vinfo->stmt);
695 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
696 (DEF_STMT) as relevant/irrelevant according to the relevance property
697 of STMT. */
698 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
700 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
701 propagated as is to the DEF_STMTs of its USEs.
703 One exception is when STMT has been identified as defining a reduction
704 variable; in this case we set the relevance to vect_used_by_reduction.
705 This is because we distinguish between two kinds of relevant stmts -
706 those that are used by a reduction computation, and those that are
707 (also) used by a regular computation. This allows us later on to
708 identify stmts that are used solely by a reduction, and therefore the
709 order of the results that they produce does not have to be kept. */
711 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
713 case vect_reduction_def:
714 gcc_assert (relevant != vect_unused_in_scope);
715 if (relevant != vect_unused_in_scope
716 && relevant != vect_used_in_scope
717 && relevant != vect_used_by_reduction
718 && relevant != vect_used_only_live)
719 return opt_result::failure_at
720 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
721 break;
723 case vect_nested_cycle:
724 if (relevant != vect_unused_in_scope
725 && relevant != vect_used_in_outer_by_reduction
726 && relevant != vect_used_in_outer)
727 return opt_result::failure_at
728 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
729 break;
731 case vect_double_reduction_def:
732 if (relevant != vect_unused_in_scope
733 && relevant != vect_used_by_reduction
734 && relevant != vect_used_only_live)
735 return opt_result::failure_at
736 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
737 break;
739 default:
740 break;
743 if (is_pattern_stmt_p (stmt_vinfo))
745 /* Pattern statements are not inserted into the code, so
746 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
747 have to scan the RHS or function arguments instead. */
748 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
750 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
751 tree op = gimple_assign_rhs1 (assign);
753 i = 1;
754 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
756 opt_result res
757 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
758 loop_vinfo, relevant, &worklist, false);
759 if (!res)
760 return res;
761 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
762 loop_vinfo, relevant, &worklist, false);
763 if (!res)
764 return res;
765 i = 2;
767 for (; i < gimple_num_ops (assign); i++)
769 op = gimple_op (assign, i);
770 if (TREE_CODE (op) == SSA_NAME)
772 opt_result res
773 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
774 &worklist, false);
775 if (!res)
776 return res;
780 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
782 for (i = 0; i < gimple_call_num_args (call); i++)
784 tree arg = gimple_call_arg (call, i);
785 opt_result res
786 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
787 &worklist, false);
788 if (!res)
789 return res;
793 else
794 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
796 tree op = USE_FROM_PTR (use_p);
797 opt_result res
798 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
799 &worklist, false);
800 if (!res)
801 return res;
804 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
806 gather_scatter_info gs_info;
807 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
808 gcc_unreachable ();
809 opt_result res
810 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
811 &worklist, true);
812 if (!res)
814 if (fatal)
815 *fatal = false;
816 return res;
819 } /* while worklist */
821 return opt_result::success ();
824 /* Function vect_model_simple_cost.
826 Models cost for simple operations, i.e. those that only emit ncopies of a
827 single op. Right now, this does not account for multiple insns that could
828 be generated for the single vector op. We will handle that shortly. */
830 static void
831 vect_model_simple_cost (vec_info *,
832 stmt_vec_info stmt_info, int ncopies,
833 enum vect_def_type *dt,
834 int ndts,
835 slp_tree node,
836 stmt_vector_for_cost *cost_vec,
837 vect_cost_for_stmt kind = vector_stmt)
839 int inside_cost = 0, prologue_cost = 0;
841 gcc_assert (cost_vec != NULL);
843 /* ??? Somehow we need to fix this at the callers. */
844 if (node)
845 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
847 if (!node)
848 /* Cost the "broadcast" of a scalar operand in to a vector operand.
849 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
850 cost model. */
851 for (int i = 0; i < ndts; i++)
852 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
853 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
854 stmt_info, 0, vect_prologue);
856 /* Pass the inside-of-loop statements to the target-specific cost model. */
857 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
858 stmt_info, 0, vect_body);
860 if (dump_enabled_p ())
861 dump_printf_loc (MSG_NOTE, vect_location,
862 "vect_model_simple_cost: inside_cost = %d, "
863 "prologue_cost = %d .\n", inside_cost, prologue_cost);
867 /* Model cost for type demotion and promotion operations. PWR is
868 normally zero for single-step promotions and demotions. It will be
869 one if two-step promotion/demotion is required, and so on. NCOPIES
870 is the number of vector results (and thus number of instructions)
871 for the narrowest end of the operation chain. Each additional
872 step doubles the number of instructions required. If WIDEN_ARITH
873 is true the stmt is doing widening arithmetic. */
875 static void
876 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
877 enum vect_def_type *dt,
878 unsigned int ncopies, int pwr,
879 stmt_vector_for_cost *cost_vec,
880 bool widen_arith)
882 int i;
883 int inside_cost = 0, prologue_cost = 0;
885 for (i = 0; i < pwr + 1; i++)
887 inside_cost += record_stmt_cost (cost_vec, ncopies,
888 widen_arith
889 ? vector_stmt : vec_promote_demote,
890 stmt_info, 0, vect_body);
891 ncopies *= 2;
894 /* FORNOW: Assuming maximum 2 args per stmts. */
895 for (i = 0; i < 2; i++)
896 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
897 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
898 stmt_info, 0, vect_prologue);
900 if (dump_enabled_p ())
901 dump_printf_loc (MSG_NOTE, vect_location,
902 "vect_model_promotion_demotion_cost: inside_cost = %d, "
903 "prologue_cost = %d .\n", inside_cost, prologue_cost);
906 /* Returns true if the current function returns DECL. */
908 static bool
909 cfun_returns (tree decl)
911 edge_iterator ei;
912 edge e;
913 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
915 greturn *ret = safe_dyn_cast <greturn *> (*gsi_last_bb (e->src));
916 if (!ret)
917 continue;
918 if (gimple_return_retval (ret) == decl)
919 return true;
920 /* We often end up with an aggregate copy to the result decl,
921 handle that case as well. First skip intermediate clobbers
922 though. */
923 gimple *def = ret;
926 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
928 while (gimple_clobber_p (def));
929 if (is_a <gassign *> (def)
930 && gimple_assign_lhs (def) == gimple_return_retval (ret)
931 && gimple_assign_rhs1 (def) == decl)
932 return true;
934 return false;
937 /* Function vect_model_store_cost
939 Models cost for stores. In the case of grouped accesses, one access
940 has the overhead of the grouped access attributed to it. */
942 static void
943 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
944 vect_memory_access_type memory_access_type,
945 gather_scatter_info *gs_info,
946 dr_alignment_support alignment_support_scheme,
947 int misalignment,
948 vec_load_store_type vls_type, slp_tree slp_node,
949 stmt_vector_for_cost *cost_vec)
951 unsigned int inside_cost = 0, prologue_cost = 0;
952 stmt_vec_info first_stmt_info = stmt_info;
953 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
955 /* ??? Somehow we need to fix this at the callers. */
956 if (slp_node)
957 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
959 if (vls_type == VLS_STORE_INVARIANT)
961 if (!slp_node)
962 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
963 stmt_info, 0, vect_prologue);
966 /* Grouped stores update all elements in the group at once,
967 so we want the DR for the first statement. */
968 if (!slp_node && grouped_access_p)
969 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
971 /* True if we should include any once-per-group costs as well as
972 the cost of the statement itself. For SLP we only get called
973 once per group anyhow. */
974 bool first_stmt_p = (first_stmt_info == stmt_info);
976 /* We assume that the cost of a single store-lanes instruction is
977 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
978 access is instead being provided by a permute-and-store operation,
979 include the cost of the permutes. */
980 if (first_stmt_p
981 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
983 /* Uses a high and low interleave or shuffle operations for each
984 needed permute. */
985 int group_size = DR_GROUP_SIZE (first_stmt_info);
986 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
987 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
988 stmt_info, 0, vect_body);
990 if (dump_enabled_p ())
991 dump_printf_loc (MSG_NOTE, vect_location,
992 "vect_model_store_cost: strided group_size = %d .\n",
993 group_size);
996 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
997 /* Costs of the stores. */
998 if (memory_access_type == VMAT_ELEMENTWISE
999 || memory_access_type == VMAT_GATHER_SCATTER)
1001 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1002 if (memory_access_type == VMAT_GATHER_SCATTER
1003 && gs_info->ifn == IFN_LAST && !gs_info->decl)
1004 /* For emulated scatter N offset vector element extracts
1005 (we assume the scalar scaling and ptr + offset add is consumed by
1006 the load). */
1007 inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
1008 vec_to_scalar, stmt_info, 0,
1009 vect_body);
1010 /* N scalar stores plus extracting the elements. */
1011 inside_cost += record_stmt_cost (cost_vec,
1012 ncopies * assumed_nunits,
1013 scalar_store, stmt_info, 0, vect_body);
1015 else
1016 vect_get_store_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
1017 misalignment, &inside_cost, cost_vec);
1019 if (memory_access_type == VMAT_ELEMENTWISE
1020 || memory_access_type == VMAT_STRIDED_SLP
1021 || (memory_access_type == VMAT_GATHER_SCATTER
1022 && gs_info->ifn == IFN_LAST && !gs_info->decl))
1024 /* N scalar stores plus extracting the elements. */
1025 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1026 inside_cost += record_stmt_cost (cost_vec,
1027 ncopies * assumed_nunits,
1028 vec_to_scalar, stmt_info, 0, vect_body);
1031 /* When vectorizing a store into the function result assign
1032 a penalty if the function returns in a multi-register location.
1033 In this case we assume we'll end up with having to spill the
1034 vector result and do piecewise loads as a conservative estimate. */
1035 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
1036 if (base
1037 && (TREE_CODE (base) == RESULT_DECL
1038 || (DECL_P (base) && cfun_returns (base)))
1039 && !aggregate_value_p (base, cfun->decl))
1041 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
1042 /* ??? Handle PARALLEL in some way. */
1043 if (REG_P (reg))
1045 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1046 /* Assume that a single reg-reg move is possible and cheap,
1047 do not account for vector to gp register move cost. */
1048 if (nregs > 1)
1050 /* Spill. */
1051 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1052 vector_store,
1053 stmt_info, 0, vect_epilogue);
1054 /* Loads. */
1055 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1056 scalar_load,
1057 stmt_info, 0, vect_epilogue);
1062 if (dump_enabled_p ())
1063 dump_printf_loc (MSG_NOTE, vect_location,
1064 "vect_model_store_cost: inside_cost = %d, "
1065 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1069 /* Calculate cost of DR's memory access. */
1070 void
1071 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1072 dr_alignment_support alignment_support_scheme,
1073 int misalignment,
1074 unsigned int *inside_cost,
1075 stmt_vector_for_cost *body_cost_vec)
1077 switch (alignment_support_scheme)
1079 case dr_aligned:
1081 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1082 vector_store, stmt_info, 0,
1083 vect_body);
1085 if (dump_enabled_p ())
1086 dump_printf_loc (MSG_NOTE, vect_location,
1087 "vect_model_store_cost: aligned.\n");
1088 break;
1091 case dr_unaligned_supported:
1093 /* Here, we assign an additional cost for the unaligned store. */
1094 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1095 unaligned_store, stmt_info,
1096 misalignment, vect_body);
1097 if (dump_enabled_p ())
1098 dump_printf_loc (MSG_NOTE, vect_location,
1099 "vect_model_store_cost: unaligned supported by "
1100 "hardware.\n");
1101 break;
1104 case dr_unaligned_unsupported:
1106 *inside_cost = VECT_MAX_COST;
1108 if (dump_enabled_p ())
1109 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1110 "vect_model_store_cost: unsupported access.\n");
1111 break;
1114 default:
1115 gcc_unreachable ();
1120 /* Function vect_model_load_cost
1122 Models cost for loads. In the case of grouped accesses, one access has
1123 the overhead of the grouped access attributed to it. Since unaligned
1124 accesses are supported for loads, we also account for the costs of the
1125 access scheme chosen. */
1127 static void
1128 vect_model_load_cost (vec_info *vinfo,
1129 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1130 vect_memory_access_type memory_access_type,
1131 dr_alignment_support alignment_support_scheme,
1132 int misalignment,
1133 gather_scatter_info *gs_info,
1134 slp_tree slp_node,
1135 stmt_vector_for_cost *cost_vec)
1137 unsigned int inside_cost = 0, prologue_cost = 0;
1138 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1140 gcc_assert (cost_vec);
1142 /* ??? Somehow we need to fix this at the callers. */
1143 if (slp_node)
1144 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1146 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1148 /* If the load is permuted then the alignment is determined by
1149 the first group element not by the first scalar stmt DR. */
1150 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1151 /* Record the cost for the permutation. */
1152 unsigned n_perms, n_loads;
1153 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1154 vf, true, &n_perms, &n_loads);
1155 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1156 first_stmt_info, 0, vect_body);
1158 /* And adjust the number of loads performed. This handles
1159 redundancies as well as loads that are later dead. */
1160 ncopies = n_loads;
1163 /* Grouped loads read all elements in the group at once,
1164 so we want the DR for the first statement. */
1165 stmt_vec_info first_stmt_info = stmt_info;
1166 if (!slp_node && grouped_access_p)
1167 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1169 /* True if we should include any once-per-group costs as well as
1170 the cost of the statement itself. For SLP we only get called
1171 once per group anyhow. */
1172 bool first_stmt_p = (first_stmt_info == stmt_info);
1174 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1175 ones we actually need. Account for the cost of unused results. */
1176 if (first_stmt_p && !slp_node && memory_access_type == VMAT_LOAD_STORE_LANES)
1178 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
1179 stmt_vec_info next_stmt_info = first_stmt_info;
1182 gaps -= 1;
1183 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
1185 while (next_stmt_info);
1186 if (gaps)
1188 if (dump_enabled_p ())
1189 dump_printf_loc (MSG_NOTE, vect_location,
1190 "vect_model_load_cost: %d unused vectors.\n",
1191 gaps);
1192 vect_get_load_cost (vinfo, stmt_info, ncopies * gaps,
1193 alignment_support_scheme, misalignment, false,
1194 &inside_cost, &prologue_cost,
1195 cost_vec, cost_vec, true);
1199 /* We assume that the cost of a single load-lanes instruction is
1200 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1201 access is instead being provided by a load-and-permute operation,
1202 include the cost of the permutes. */
1203 if (first_stmt_p
1204 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1206 /* Uses an even and odd extract operations or shuffle operations
1207 for each needed permute. */
1208 int group_size = DR_GROUP_SIZE (first_stmt_info);
1209 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1210 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1211 stmt_info, 0, vect_body);
1213 if (dump_enabled_p ())
1214 dump_printf_loc (MSG_NOTE, vect_location,
1215 "vect_model_load_cost: strided group_size = %d .\n",
1216 group_size);
1219 /* The loads themselves. */
1220 if (memory_access_type == VMAT_ELEMENTWISE
1221 || memory_access_type == VMAT_GATHER_SCATTER)
1223 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1224 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1225 if (memory_access_type == VMAT_GATHER_SCATTER
1226 && gs_info->ifn == IFN_LAST && !gs_info->decl)
1227 /* For emulated gathers N offset vector element extracts
1228 (we assume the scalar scaling and ptr + offset add is consumed by
1229 the load). */
1230 inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
1231 vec_to_scalar, stmt_info, 0,
1232 vect_body);
1233 /* N scalar loads plus gathering them into a vector. */
1234 inside_cost += record_stmt_cost (cost_vec,
1235 ncopies * assumed_nunits,
1236 scalar_load, stmt_info, 0, vect_body);
1238 else if (memory_access_type == VMAT_INVARIANT)
1240 /* Invariant loads will ideally be hoisted and splat to a vector. */
1241 prologue_cost += record_stmt_cost (cost_vec, 1,
1242 scalar_load, stmt_info, 0,
1243 vect_prologue);
1244 prologue_cost += record_stmt_cost (cost_vec, 1,
1245 scalar_to_vec, stmt_info, 0,
1246 vect_prologue);
1248 else
1249 vect_get_load_cost (vinfo, stmt_info, ncopies,
1250 alignment_support_scheme, misalignment, first_stmt_p,
1251 &inside_cost, &prologue_cost,
1252 cost_vec, cost_vec, true);
1253 if (memory_access_type == VMAT_ELEMENTWISE
1254 || memory_access_type == VMAT_STRIDED_SLP
1255 || (memory_access_type == VMAT_GATHER_SCATTER
1256 && gs_info->ifn == IFN_LAST && !gs_info->decl))
1257 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1258 stmt_info, 0, vect_body);
1260 if (dump_enabled_p ())
1261 dump_printf_loc (MSG_NOTE, vect_location,
1262 "vect_model_load_cost: inside_cost = %d, "
1263 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1267 /* Calculate cost of DR's memory access. */
1268 void
1269 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1270 dr_alignment_support alignment_support_scheme,
1271 int misalignment,
1272 bool add_realign_cost, unsigned int *inside_cost,
1273 unsigned int *prologue_cost,
1274 stmt_vector_for_cost *prologue_cost_vec,
1275 stmt_vector_for_cost *body_cost_vec,
1276 bool record_prologue_costs)
1278 switch (alignment_support_scheme)
1280 case dr_aligned:
1282 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1283 stmt_info, 0, vect_body);
1285 if (dump_enabled_p ())
1286 dump_printf_loc (MSG_NOTE, vect_location,
1287 "vect_model_load_cost: aligned.\n");
1289 break;
1291 case dr_unaligned_supported:
1293 /* Here, we assign an additional cost for the unaligned load. */
1294 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1295 unaligned_load, stmt_info,
1296 misalignment, vect_body);
1298 if (dump_enabled_p ())
1299 dump_printf_loc (MSG_NOTE, vect_location,
1300 "vect_model_load_cost: unaligned supported by "
1301 "hardware.\n");
1303 break;
1305 case dr_explicit_realign:
1307 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1308 vector_load, stmt_info, 0, vect_body);
1309 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1310 vec_perm, stmt_info, 0, vect_body);
1312 /* FIXME: If the misalignment remains fixed across the iterations of
1313 the containing loop, the following cost should be added to the
1314 prologue costs. */
1315 if (targetm.vectorize.builtin_mask_for_load)
1316 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1317 stmt_info, 0, vect_body);
1319 if (dump_enabled_p ())
1320 dump_printf_loc (MSG_NOTE, vect_location,
1321 "vect_model_load_cost: explicit realign\n");
1323 break;
1325 case dr_explicit_realign_optimized:
1327 if (dump_enabled_p ())
1328 dump_printf_loc (MSG_NOTE, vect_location,
1329 "vect_model_load_cost: unaligned software "
1330 "pipelined.\n");
1332 /* Unaligned software pipeline has a load of an address, an initial
1333 load, and possibly a mask operation to "prime" the loop. However,
1334 if this is an access in a group of loads, which provide grouped
1335 access, then the above cost should only be considered for one
1336 access in the group. Inside the loop, there is a load op
1337 and a realignment op. */
1339 if (add_realign_cost && record_prologue_costs)
1341 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1342 vector_stmt, stmt_info,
1343 0, vect_prologue);
1344 if (targetm.vectorize.builtin_mask_for_load)
1345 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1346 vector_stmt, stmt_info,
1347 0, vect_prologue);
1350 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1351 stmt_info, 0, vect_body);
1352 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1353 stmt_info, 0, vect_body);
1355 if (dump_enabled_p ())
1356 dump_printf_loc (MSG_NOTE, vect_location,
1357 "vect_model_load_cost: explicit realign optimized"
1358 "\n");
1360 break;
1363 case dr_unaligned_unsupported:
1365 *inside_cost = VECT_MAX_COST;
1367 if (dump_enabled_p ())
1368 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1369 "vect_model_load_cost: unsupported access.\n");
1370 break;
1373 default:
1374 gcc_unreachable ();
1378 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1379 the loop preheader for the vectorized stmt STMT_VINFO. */
1381 static void
1382 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1383 gimple_stmt_iterator *gsi)
1385 if (gsi)
1386 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1387 else
1388 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1390 if (dump_enabled_p ())
1391 dump_printf_loc (MSG_NOTE, vect_location,
1392 "created new init_stmt: %G", new_stmt);
1395 /* Function vect_init_vector.
1397 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1398 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1399 vector type a vector with all elements equal to VAL is created first.
1400 Place the initialization at GSI if it is not NULL. Otherwise, place the
1401 initialization at the loop preheader.
1402 Return the DEF of INIT_STMT.
1403 It will be used in the vectorization of STMT_INFO. */
1405 tree
1406 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1407 gimple_stmt_iterator *gsi)
1409 gimple *init_stmt;
1410 tree new_temp;
1412 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1413 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1415 gcc_assert (VECTOR_TYPE_P (type));
1416 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1418 /* Scalar boolean value should be transformed into
1419 all zeros or all ones value before building a vector. */
1420 if (VECTOR_BOOLEAN_TYPE_P (type))
1422 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1423 tree false_val = build_zero_cst (TREE_TYPE (type));
1425 if (CONSTANT_CLASS_P (val))
1426 val = integer_zerop (val) ? false_val : true_val;
1427 else
1429 new_temp = make_ssa_name (TREE_TYPE (type));
1430 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1431 val, true_val, false_val);
1432 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1433 val = new_temp;
1436 else
1438 gimple_seq stmts = NULL;
1439 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1440 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1441 TREE_TYPE (type), val);
1442 else
1443 /* ??? Condition vectorization expects us to do
1444 promotion of invariant/external defs. */
1445 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1446 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1447 !gsi_end_p (gsi2); )
1449 init_stmt = gsi_stmt (gsi2);
1450 gsi_remove (&gsi2, false);
1451 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1455 val = build_vector_from_val (type, val);
1458 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1459 init_stmt = gimple_build_assign (new_temp, val);
1460 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1461 return new_temp;
1465 /* Function vect_get_vec_defs_for_operand.
1467 OP is an operand in STMT_VINFO. This function returns a vector of
1468 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1470 In the case that OP is an SSA_NAME which is defined in the loop, then
1471 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1473 In case OP is an invariant or constant, a new stmt that creates a vector def
1474 needs to be introduced. VECTYPE may be used to specify a required type for
1475 vector invariant. */
1477 void
1478 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1479 unsigned ncopies,
1480 tree op, vec<tree> *vec_oprnds, tree vectype)
1482 gimple *def_stmt;
1483 enum vect_def_type dt;
1484 bool is_simple_use;
1485 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1487 if (dump_enabled_p ())
1488 dump_printf_loc (MSG_NOTE, vect_location,
1489 "vect_get_vec_defs_for_operand: %T\n", op);
1491 stmt_vec_info def_stmt_info;
1492 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1493 &def_stmt_info, &def_stmt);
1494 gcc_assert (is_simple_use);
1495 if (def_stmt && dump_enabled_p ())
1496 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1498 vec_oprnds->create (ncopies);
1499 if (dt == vect_constant_def || dt == vect_external_def)
1501 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1502 tree vector_type;
1504 if (vectype)
1505 vector_type = vectype;
1506 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1507 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1508 vector_type = truth_type_for (stmt_vectype);
1509 else
1510 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1512 gcc_assert (vector_type);
1513 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1514 while (ncopies--)
1515 vec_oprnds->quick_push (vop);
1517 else
1519 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1520 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1521 for (unsigned i = 0; i < ncopies; ++i)
1522 vec_oprnds->quick_push (gimple_get_lhs
1523 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1528 /* Get vectorized definitions for OP0 and OP1. */
1530 void
1531 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1532 unsigned ncopies,
1533 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1534 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1535 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1536 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1538 if (slp_node)
1540 if (op0)
1541 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1542 if (op1)
1543 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1544 if (op2)
1545 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1546 if (op3)
1547 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1549 else
1551 if (op0)
1552 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1553 op0, vec_oprnds0, vectype0);
1554 if (op1)
1555 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1556 op1, vec_oprnds1, vectype1);
1557 if (op2)
1558 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1559 op2, vec_oprnds2, vectype2);
1560 if (op3)
1561 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1562 op3, vec_oprnds3, vectype3);
1566 void
1567 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1568 unsigned ncopies,
1569 tree op0, vec<tree> *vec_oprnds0,
1570 tree op1, vec<tree> *vec_oprnds1,
1571 tree op2, vec<tree> *vec_oprnds2,
1572 tree op3, vec<tree> *vec_oprnds3)
1574 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1575 op0, vec_oprnds0, NULL_TREE,
1576 op1, vec_oprnds1, NULL_TREE,
1577 op2, vec_oprnds2, NULL_TREE,
1578 op3, vec_oprnds3, NULL_TREE);
1581 /* Helper function called by vect_finish_replace_stmt and
1582 vect_finish_stmt_generation. Set the location of the new
1583 statement and create and return a stmt_vec_info for it. */
1585 static void
1586 vect_finish_stmt_generation_1 (vec_info *,
1587 stmt_vec_info stmt_info, gimple *vec_stmt)
1589 if (dump_enabled_p ())
1590 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1592 if (stmt_info)
1594 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1596 /* While EH edges will generally prevent vectorization, stmt might
1597 e.g. be in a must-not-throw region. Ensure newly created stmts
1598 that could throw are part of the same region. */
1599 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1600 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1601 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1603 else
1604 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1607 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1608 which sets the same scalar result as STMT_INFO did. Create and return a
1609 stmt_vec_info for VEC_STMT. */
1611 void
1612 vect_finish_replace_stmt (vec_info *vinfo,
1613 stmt_vec_info stmt_info, gimple *vec_stmt)
1615 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1616 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1618 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1619 gsi_replace (&gsi, vec_stmt, true);
1621 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1624 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1625 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1627 void
1628 vect_finish_stmt_generation (vec_info *vinfo,
1629 stmt_vec_info stmt_info, gimple *vec_stmt,
1630 gimple_stmt_iterator *gsi)
1632 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1634 if (!gsi_end_p (*gsi)
1635 && gimple_has_mem_ops (vec_stmt))
1637 gimple *at_stmt = gsi_stmt (*gsi);
1638 tree vuse = gimple_vuse (at_stmt);
1639 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1641 tree vdef = gimple_vdef (at_stmt);
1642 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1643 gimple_set_modified (vec_stmt, true);
1644 /* If we have an SSA vuse and insert a store, update virtual
1645 SSA form to avoid triggering the renamer. Do so only
1646 if we can easily see all uses - which is what almost always
1647 happens with the way vectorized stmts are inserted. */
1648 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1649 && ((is_gimple_assign (vec_stmt)
1650 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1651 || (is_gimple_call (vec_stmt)
1652 && (!(gimple_call_flags (vec_stmt)
1653 & (ECF_CONST|ECF_PURE|ECF_NOVOPS))
1654 || (gimple_call_lhs (vec_stmt)
1655 && !is_gimple_reg (gimple_call_lhs (vec_stmt)))))))
1657 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1658 gimple_set_vdef (vec_stmt, new_vdef);
1659 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1663 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1664 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1667 /* We want to vectorize a call to combined function CFN with function
1668 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1669 as the types of all inputs. Check whether this is possible using
1670 an internal function, returning its code if so or IFN_LAST if not. */
1672 static internal_fn
1673 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1674 tree vectype_out, tree vectype_in)
1676 internal_fn ifn;
1677 if (internal_fn_p (cfn))
1678 ifn = as_internal_fn (cfn);
1679 else
1680 ifn = associated_internal_fn (fndecl);
1681 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1683 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1684 if (info.vectorizable)
1686 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1687 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1688 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1689 OPTIMIZE_FOR_SPEED))
1690 return ifn;
1693 return IFN_LAST;
1697 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1698 gimple_stmt_iterator *);
1700 /* Check whether a load or store statement in the loop described by
1701 LOOP_VINFO is possible in a loop using partial vectors. This is
1702 testing whether the vectorizer pass has the appropriate support,
1703 as well as whether the target does.
1705 VLS_TYPE says whether the statement is a load or store and VECTYPE
1706 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1707 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1708 says how the load or store is going to be implemented and GROUP_SIZE
1709 is the number of load or store statements in the containing group.
1710 If the access is a gather load or scatter store, GS_INFO describes
1711 its arguments. If the load or store is conditional, SCALAR_MASK is the
1712 condition under which it occurs.
1714 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1715 vectors is not supported, otherwise record the required rgroup control
1716 types. */
1718 static void
1719 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1720 slp_tree slp_node,
1721 vec_load_store_type vls_type,
1722 int group_size,
1723 vect_memory_access_type
1724 memory_access_type,
1725 gather_scatter_info *gs_info,
1726 tree scalar_mask)
1728 /* Invariant loads need no special support. */
1729 if (memory_access_type == VMAT_INVARIANT)
1730 return;
1732 unsigned int nvectors;
1733 if (slp_node)
1734 nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1735 else
1736 nvectors = vect_get_num_copies (loop_vinfo, vectype);
1738 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1739 machine_mode vecmode = TYPE_MODE (vectype);
1740 bool is_load = (vls_type == VLS_LOAD);
1741 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1743 if (is_load
1744 ? !vect_load_lanes_supported (vectype, group_size, true)
1745 : !vect_store_lanes_supported (vectype, group_size, true))
1747 if (dump_enabled_p ())
1748 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1749 "can't operate on partial vectors because"
1750 " the target doesn't have an appropriate"
1751 " load/store-lanes instruction.\n");
1752 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1753 return;
1755 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1756 scalar_mask);
1757 return;
1760 if (memory_access_type == VMAT_GATHER_SCATTER)
1762 internal_fn ifn = (is_load
1763 ? IFN_MASK_GATHER_LOAD
1764 : IFN_MASK_SCATTER_STORE);
1765 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1766 gs_info->memory_type,
1767 gs_info->offset_vectype,
1768 gs_info->scale))
1770 if (dump_enabled_p ())
1771 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1772 "can't operate on partial vectors because"
1773 " the target doesn't have an appropriate"
1774 " gather load or scatter store instruction.\n");
1775 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1776 return;
1778 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1779 scalar_mask);
1780 return;
1783 if (memory_access_type != VMAT_CONTIGUOUS
1784 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1786 /* Element X of the data must come from iteration i * VF + X of the
1787 scalar loop. We need more work to support other mappings. */
1788 if (dump_enabled_p ())
1789 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1790 "can't operate on partial vectors because an"
1791 " access isn't contiguous.\n");
1792 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1793 return;
1796 if (!VECTOR_MODE_P (vecmode))
1798 if (dump_enabled_p ())
1799 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1800 "can't operate on partial vectors when emulating"
1801 " vector operations.\n");
1802 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1803 return;
1806 /* We might load more scalars than we need for permuting SLP loads.
1807 We checked in get_group_load_store_type that the extra elements
1808 don't leak into a new vector. */
1809 auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1811 unsigned int nvectors;
1812 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1813 return nvectors;
1814 gcc_unreachable ();
1817 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1818 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1819 machine_mode mask_mode;
1820 bool using_partial_vectors_p = false;
1821 if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1822 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1824 nvectors = group_memory_nvectors (group_size * vf, nunits);
1825 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1826 using_partial_vectors_p = true;
1829 machine_mode vmode;
1830 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1832 nvectors = group_memory_nvectors (group_size * vf, nunits);
1833 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1834 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1835 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1836 using_partial_vectors_p = true;
1839 if (!using_partial_vectors_p)
1841 if (dump_enabled_p ())
1842 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1843 "can't operate on partial vectors because the"
1844 " target doesn't have the appropriate partial"
1845 " vectorization load or store.\n");
1846 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1850 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1851 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1852 that needs to be applied to all loads and stores in a vectorized loop.
1853 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1854 otherwise return VEC_MASK & LOOP_MASK.
1856 MASK_TYPE is the type of both masks. If new statements are needed,
1857 insert them before GSI. */
1859 static tree
1860 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1861 tree vec_mask, gimple_stmt_iterator *gsi)
1863 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1864 if (!loop_mask)
1865 return vec_mask;
1867 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1869 if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1870 return vec_mask;
1872 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1873 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1874 vec_mask, loop_mask);
1876 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1877 return and_res;
1880 /* Determine whether we can use a gather load or scatter store to vectorize
1881 strided load or store STMT_INFO by truncating the current offset to a
1882 smaller width. We need to be able to construct an offset vector:
1884 { 0, X, X*2, X*3, ... }
1886 without loss of precision, where X is STMT_INFO's DR_STEP.
1888 Return true if this is possible, describing the gather load or scatter
1889 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1891 static bool
1892 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1893 loop_vec_info loop_vinfo, bool masked_p,
1894 gather_scatter_info *gs_info)
1896 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1897 data_reference *dr = dr_info->dr;
1898 tree step = DR_STEP (dr);
1899 if (TREE_CODE (step) != INTEGER_CST)
1901 /* ??? Perhaps we could use range information here? */
1902 if (dump_enabled_p ())
1903 dump_printf_loc (MSG_NOTE, vect_location,
1904 "cannot truncate variable step.\n");
1905 return false;
1908 /* Get the number of bits in an element. */
1909 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1910 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1911 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1913 /* Set COUNT to the upper limit on the number of elements - 1.
1914 Start with the maximum vectorization factor. */
1915 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1917 /* Try lowering COUNT to the number of scalar latch iterations. */
1918 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1919 widest_int max_iters;
1920 if (max_loop_iterations (loop, &max_iters)
1921 && max_iters < count)
1922 count = max_iters.to_shwi ();
1924 /* Try scales of 1 and the element size. */
1925 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1926 wi::overflow_type overflow = wi::OVF_NONE;
1927 for (int i = 0; i < 2; ++i)
1929 int scale = scales[i];
1930 widest_int factor;
1931 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1932 continue;
1934 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1935 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1936 if (overflow)
1937 continue;
1938 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1939 unsigned int min_offset_bits = wi::min_precision (range, sign);
1941 /* Find the narrowest viable offset type. */
1942 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1943 tree offset_type = build_nonstandard_integer_type (offset_bits,
1944 sign == UNSIGNED);
1946 /* See whether the target supports the operation with an offset
1947 no narrower than OFFSET_TYPE. */
1948 tree memory_type = TREE_TYPE (DR_REF (dr));
1949 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1950 vectype, memory_type, offset_type, scale,
1951 &gs_info->ifn, &gs_info->offset_vectype)
1952 || gs_info->ifn == IFN_LAST)
1953 continue;
1955 gs_info->decl = NULL_TREE;
1956 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1957 but we don't need to store that here. */
1958 gs_info->base = NULL_TREE;
1959 gs_info->element_type = TREE_TYPE (vectype);
1960 gs_info->offset = fold_convert (offset_type, step);
1961 gs_info->offset_dt = vect_constant_def;
1962 gs_info->scale = scale;
1963 gs_info->memory_type = memory_type;
1964 return true;
1967 if (overflow && dump_enabled_p ())
1968 dump_printf_loc (MSG_NOTE, vect_location,
1969 "truncating gather/scatter offset to %d bits"
1970 " might change its value.\n", element_bits);
1972 return false;
1975 /* Return true if we can use gather/scatter internal functions to
1976 vectorize STMT_INFO, which is a grouped or strided load or store.
1977 MASKED_P is true if load or store is conditional. When returning
1978 true, fill in GS_INFO with the information required to perform the
1979 operation. */
1981 static bool
1982 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1983 loop_vec_info loop_vinfo, bool masked_p,
1984 gather_scatter_info *gs_info)
1986 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1987 || gs_info->ifn == IFN_LAST)
1988 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1989 masked_p, gs_info);
1991 tree old_offset_type = TREE_TYPE (gs_info->offset);
1992 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1994 gcc_assert (TYPE_PRECISION (new_offset_type)
1995 >= TYPE_PRECISION (old_offset_type));
1996 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1998 if (dump_enabled_p ())
1999 dump_printf_loc (MSG_NOTE, vect_location,
2000 "using gather/scatter for strided/grouped access,"
2001 " scale = %d\n", gs_info->scale);
2003 return true;
2006 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2007 elements with a known constant step. Return -1 if that step
2008 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2010 static int
2011 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
2013 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2014 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
2015 size_zero_node);
2018 /* If the target supports a permute mask that reverses the elements in
2019 a vector of type VECTYPE, return that mask, otherwise return null. */
2021 static tree
2022 perm_mask_for_reverse (tree vectype)
2024 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2026 /* The encoding has a single stepped pattern. */
2027 vec_perm_builder sel (nunits, 1, 3);
2028 for (int i = 0; i < 3; ++i)
2029 sel.quick_push (nunits - 1 - i);
2031 vec_perm_indices indices (sel, 1, nunits);
2032 if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
2033 indices))
2034 return NULL_TREE;
2035 return vect_gen_perm_mask_checked (vectype, indices);
2038 /* A subroutine of get_load_store_type, with a subset of the same
2039 arguments. Handle the case where STMT_INFO is a load or store that
2040 accesses consecutive elements with a negative step. Sets *POFFSET
2041 to the offset to be applied to the DR for the first access. */
2043 static vect_memory_access_type
2044 get_negative_load_store_type (vec_info *vinfo,
2045 stmt_vec_info stmt_info, tree vectype,
2046 vec_load_store_type vls_type,
2047 unsigned int ncopies, poly_int64 *poffset)
2049 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2050 dr_alignment_support alignment_support_scheme;
2052 if (ncopies > 1)
2054 if (dump_enabled_p ())
2055 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2056 "multiple types with negative step.\n");
2057 return VMAT_ELEMENTWISE;
2060 /* For backward running DRs the first access in vectype actually is
2061 N-1 elements before the address of the DR. */
2062 *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
2063 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2065 int misalignment = dr_misalignment (dr_info, vectype, *poffset);
2066 alignment_support_scheme
2067 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
2068 if (alignment_support_scheme != dr_aligned
2069 && alignment_support_scheme != dr_unaligned_supported)
2071 if (dump_enabled_p ())
2072 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2073 "negative step but alignment required.\n");
2074 *poffset = 0;
2075 return VMAT_ELEMENTWISE;
2078 if (vls_type == VLS_STORE_INVARIANT)
2080 if (dump_enabled_p ())
2081 dump_printf_loc (MSG_NOTE, vect_location,
2082 "negative step with invariant source;"
2083 " no permute needed.\n");
2084 return VMAT_CONTIGUOUS_DOWN;
2087 if (!perm_mask_for_reverse (vectype))
2089 if (dump_enabled_p ())
2090 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2091 "negative step and reversing not supported.\n");
2092 *poffset = 0;
2093 return VMAT_ELEMENTWISE;
2096 return VMAT_CONTIGUOUS_REVERSE;
2099 /* STMT_INFO is either a masked or unconditional store. Return the value
2100 being stored. */
2102 tree
2103 vect_get_store_rhs (stmt_vec_info stmt_info)
2105 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2107 gcc_assert (gimple_assign_single_p (assign));
2108 return gimple_assign_rhs1 (assign);
2110 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2112 internal_fn ifn = gimple_call_internal_fn (call);
2113 int index = internal_fn_stored_value_index (ifn);
2114 gcc_assert (index >= 0);
2115 return gimple_call_arg (call, index);
2117 gcc_unreachable ();
2120 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2122 This function returns a vector type which can be composed with NETLS pieces,
2123 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2124 same vector size as the return vector. It checks target whether supports
2125 pieces-size vector mode for construction firstly, if target fails to, check
2126 pieces-size scalar mode for construction further. It returns NULL_TREE if
2127 fails to find the available composition.
2129 For example, for (vtype=V16QI, nelts=4), we can probably get:
2130 - V16QI with PTYPE V4QI.
2131 - V4SI with PTYPE SI.
2132 - NULL_TREE. */
2134 static tree
2135 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2137 gcc_assert (VECTOR_TYPE_P (vtype));
2138 gcc_assert (known_gt (nelts, 0U));
2140 machine_mode vmode = TYPE_MODE (vtype);
2141 if (!VECTOR_MODE_P (vmode))
2142 return NULL_TREE;
2144 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2145 unsigned int pbsize;
2146 if (constant_multiple_p (vbsize, nelts, &pbsize))
2148 /* First check if vec_init optab supports construction from
2149 vector pieces directly. */
2150 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2151 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2152 machine_mode rmode;
2153 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2154 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2155 != CODE_FOR_nothing))
2157 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2158 return vtype;
2161 /* Otherwise check if exists an integer type of the same piece size and
2162 if vec_init optab supports construction from it directly. */
2163 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2164 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2165 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2166 != CODE_FOR_nothing))
2168 *ptype = build_nonstandard_integer_type (pbsize, 1);
2169 return build_vector_type (*ptype, nelts);
2173 return NULL_TREE;
2176 /* A subroutine of get_load_store_type, with a subset of the same
2177 arguments. Handle the case where STMT_INFO is part of a grouped load
2178 or store.
2180 For stores, the statements in the group are all consecutive
2181 and there is no gap at the end. For loads, the statements in the
2182 group might not be consecutive; there can be gaps between statements
2183 as well as at the end. */
2185 static bool
2186 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2187 tree vectype, slp_tree slp_node,
2188 bool masked_p, vec_load_store_type vls_type,
2189 vect_memory_access_type *memory_access_type,
2190 poly_int64 *poffset,
2191 dr_alignment_support *alignment_support_scheme,
2192 int *misalignment,
2193 gather_scatter_info *gs_info)
2195 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2196 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2197 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2198 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2199 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2200 bool single_element_p = (stmt_info == first_stmt_info
2201 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2202 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2203 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2205 /* True if the vectorized statements would access beyond the last
2206 statement in the group. */
2207 bool overrun_p = false;
2209 /* True if we can cope with such overrun by peeling for gaps, so that
2210 there is at least one final scalar iteration after the vector loop. */
2211 bool can_overrun_p = (!masked_p
2212 && vls_type == VLS_LOAD
2213 && loop_vinfo
2214 && !loop->inner);
2216 /* There can only be a gap at the end of the group if the stride is
2217 known at compile time. */
2218 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2220 /* Stores can't yet have gaps. */
2221 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2223 if (slp_node)
2225 /* For SLP vectorization we directly vectorize a subchain
2226 without permutation. */
2227 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2228 first_dr_info
2229 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2230 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2232 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2233 separated by the stride, until we have a complete vector.
2234 Fall back to scalar accesses if that isn't possible. */
2235 if (multiple_p (nunits, group_size))
2236 *memory_access_type = VMAT_STRIDED_SLP;
2237 else
2238 *memory_access_type = VMAT_ELEMENTWISE;
2240 else
2242 overrun_p = loop_vinfo && gap != 0;
2243 if (overrun_p && vls_type != VLS_LOAD)
2245 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2246 "Grouped store with gaps requires"
2247 " non-consecutive accesses\n");
2248 return false;
2250 /* An overrun is fine if the trailing elements are smaller
2251 than the alignment boundary B. Every vector access will
2252 be a multiple of B and so we are guaranteed to access a
2253 non-gap element in the same B-sized block. */
2254 if (overrun_p
2255 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2256 vectype)
2257 / vect_get_scalar_dr_size (first_dr_info)))
2258 overrun_p = false;
2260 /* If the gap splits the vector in half and the target
2261 can do half-vector operations avoid the epilogue peeling
2262 by simply loading half of the vector only. Usually
2263 the construction with an upper zero half will be elided. */
2264 dr_alignment_support alss;
2265 int misalign = dr_misalignment (first_dr_info, vectype);
2266 tree half_vtype;
2267 if (overrun_p
2268 && !masked_p
2269 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2270 vectype, misalign)))
2271 == dr_aligned
2272 || alss == dr_unaligned_supported)
2273 && known_eq (nunits, (group_size - gap) * 2)
2274 && known_eq (nunits, group_size)
2275 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2276 != NULL_TREE))
2277 overrun_p = false;
2279 if (overrun_p && !can_overrun_p)
2281 if (dump_enabled_p ())
2282 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2283 "Peeling for outer loop is not supported\n");
2284 return false;
2286 int cmp = compare_step_with_zero (vinfo, stmt_info);
2287 if (cmp < 0)
2289 if (single_element_p)
2290 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2291 only correct for single element "interleaving" SLP. */
2292 *memory_access_type = get_negative_load_store_type
2293 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2294 else
2296 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2297 separated by the stride, until we have a complete vector.
2298 Fall back to scalar accesses if that isn't possible. */
2299 if (multiple_p (nunits, group_size))
2300 *memory_access_type = VMAT_STRIDED_SLP;
2301 else
2302 *memory_access_type = VMAT_ELEMENTWISE;
2305 else
2307 gcc_assert (!loop_vinfo || cmp > 0);
2308 *memory_access_type = VMAT_CONTIGUOUS;
2311 /* When we have a contiguous access across loop iterations
2312 but the access in the loop doesn't cover the full vector
2313 we can end up with no gap recorded but still excess
2314 elements accessed, see PR103116. Make sure we peel for
2315 gaps if necessary and sufficient and give up if not. */
2316 if (loop_vinfo
2317 && *memory_access_type == VMAT_CONTIGUOUS
2318 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2319 && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2320 nunits))
2322 unsigned HOST_WIDE_INT cnunits, cvf;
2323 if (!can_overrun_p
2324 || !nunits.is_constant (&cnunits)
2325 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
2326 /* Peeling for gaps assumes that a single scalar iteration
2327 is enough to make sure the last vector iteration doesn't
2328 access excess elements.
2329 ??? Enhancements include peeling multiple iterations
2330 or using masked loads with a static mask. */
2331 || (group_size * cvf) % cnunits + group_size < cnunits)
2333 if (dump_enabled_p ())
2334 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2335 "peeling for gaps insufficient for "
2336 "access\n");
2337 return false;
2339 overrun_p = true;
2343 else
2345 /* We can always handle this case using elementwise accesses,
2346 but see if something more efficient is available. */
2347 *memory_access_type = VMAT_ELEMENTWISE;
2349 /* If there is a gap at the end of the group then these optimizations
2350 would access excess elements in the last iteration. */
2351 bool would_overrun_p = (gap != 0);
2352 /* An overrun is fine if the trailing elements are smaller than the
2353 alignment boundary B. Every vector access will be a multiple of B
2354 and so we are guaranteed to access a non-gap element in the
2355 same B-sized block. */
2356 if (would_overrun_p
2357 && !masked_p
2358 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2359 / vect_get_scalar_dr_size (first_dr_info)))
2360 would_overrun_p = false;
2362 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2363 && (can_overrun_p || !would_overrun_p)
2364 && compare_step_with_zero (vinfo, stmt_info) > 0)
2366 /* First cope with the degenerate case of a single-element
2367 vector. */
2368 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2371 /* Otherwise try using LOAD/STORE_LANES. */
2372 else if (vls_type == VLS_LOAD
2373 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2374 : vect_store_lanes_supported (vectype, group_size,
2375 masked_p))
2377 *memory_access_type = VMAT_LOAD_STORE_LANES;
2378 overrun_p = would_overrun_p;
2381 /* If that fails, try using permuting loads. */
2382 else if (vls_type == VLS_LOAD
2383 ? vect_grouped_load_supported (vectype, single_element_p,
2384 group_size)
2385 : vect_grouped_store_supported (vectype, group_size))
2387 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2388 overrun_p = would_overrun_p;
2392 /* As a last resort, trying using a gather load or scatter store.
2394 ??? Although the code can handle all group sizes correctly,
2395 it probably isn't a win to use separate strided accesses based
2396 on nearby locations. Or, even if it's a win over scalar code,
2397 it might not be a win over vectorizing at a lower VF, if that
2398 allows us to use contiguous accesses. */
2399 if (*memory_access_type == VMAT_ELEMENTWISE
2400 && single_element_p
2401 && loop_vinfo
2402 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2403 masked_p, gs_info))
2404 *memory_access_type = VMAT_GATHER_SCATTER;
2407 if (*memory_access_type == VMAT_GATHER_SCATTER
2408 || *memory_access_type == VMAT_ELEMENTWISE)
2410 *alignment_support_scheme = dr_unaligned_supported;
2411 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2413 else
2415 *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2416 *alignment_support_scheme
2417 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2418 *misalignment);
2421 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2423 /* STMT is the leader of the group. Check the operands of all the
2424 stmts of the group. */
2425 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2426 while (next_stmt_info)
2428 tree op = vect_get_store_rhs (next_stmt_info);
2429 enum vect_def_type dt;
2430 if (!vect_is_simple_use (op, vinfo, &dt))
2432 if (dump_enabled_p ())
2433 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2434 "use not simple.\n");
2435 return false;
2437 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2441 if (overrun_p)
2443 gcc_assert (can_overrun_p);
2444 if (dump_enabled_p ())
2445 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2446 "Data access with gaps requires scalar "
2447 "epilogue loop\n");
2448 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2451 return true;
2454 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2455 if there is a memory access type that the vectorized form can use,
2456 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2457 or scatters, fill in GS_INFO accordingly. In addition
2458 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2459 the target does not support the alignment scheme. *MISALIGNMENT
2460 is set according to the alignment of the access (including
2461 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2463 SLP says whether we're performing SLP rather than loop vectorization.
2464 MASKED_P is true if the statement is conditional on a vectorized mask.
2465 VECTYPE is the vector type that the vectorized statements will use.
2466 NCOPIES is the number of vector statements that will be needed. */
2468 static bool
2469 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2470 tree vectype, slp_tree slp_node,
2471 bool masked_p, vec_load_store_type vls_type,
2472 unsigned int ncopies,
2473 vect_memory_access_type *memory_access_type,
2474 poly_int64 *poffset,
2475 dr_alignment_support *alignment_support_scheme,
2476 int *misalignment,
2477 gather_scatter_info *gs_info)
2479 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2480 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2481 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2482 *poffset = 0;
2483 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2485 *memory_access_type = VMAT_GATHER_SCATTER;
2486 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2487 gcc_unreachable ();
2488 /* When using internal functions, we rely on pattern recognition
2489 to convert the type of the offset to the type that the target
2490 requires, with the result being a call to an internal function.
2491 If that failed for some reason (e.g. because another pattern
2492 took priority), just handle cases in which the offset already
2493 has the right type. */
2494 else if (gs_info->ifn != IFN_LAST
2495 && !is_gimple_call (stmt_info->stmt)
2496 && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
2497 TREE_TYPE (gs_info->offset_vectype)))
2499 if (dump_enabled_p ())
2500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2501 "%s offset requires a conversion\n",
2502 vls_type == VLS_LOAD ? "gather" : "scatter");
2503 return false;
2505 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2506 &gs_info->offset_dt,
2507 &gs_info->offset_vectype))
2509 if (dump_enabled_p ())
2510 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2511 "%s index use not simple.\n",
2512 vls_type == VLS_LOAD ? "gather" : "scatter");
2513 return false;
2515 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2517 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2518 || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant ()
2519 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2520 (gs_info->offset_vectype),
2521 TYPE_VECTOR_SUBPARTS (vectype)))
2523 if (dump_enabled_p ())
2524 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2525 "unsupported vector types for emulated "
2526 "gather.\n");
2527 return false;
2530 /* Gather-scatter accesses perform only component accesses, alignment
2531 is irrelevant for them. */
2532 *alignment_support_scheme = dr_unaligned_supported;
2534 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2536 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2537 masked_p,
2538 vls_type, memory_access_type, poffset,
2539 alignment_support_scheme,
2540 misalignment, gs_info))
2541 return false;
2543 else if (STMT_VINFO_STRIDED_P (stmt_info))
2545 gcc_assert (!slp_node);
2546 if (loop_vinfo
2547 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2548 masked_p, gs_info))
2549 *memory_access_type = VMAT_GATHER_SCATTER;
2550 else
2551 *memory_access_type = VMAT_ELEMENTWISE;
2552 /* Alignment is irrelevant here. */
2553 *alignment_support_scheme = dr_unaligned_supported;
2555 else
2557 int cmp = compare_step_with_zero (vinfo, stmt_info);
2558 if (cmp == 0)
2560 gcc_assert (vls_type == VLS_LOAD);
2561 *memory_access_type = VMAT_INVARIANT;
2562 /* Invariant accesses perform only component accesses, alignment
2563 is irrelevant for them. */
2564 *alignment_support_scheme = dr_unaligned_supported;
2566 else
2568 if (cmp < 0)
2569 *memory_access_type = get_negative_load_store_type
2570 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2571 else
2572 *memory_access_type = VMAT_CONTIGUOUS;
2573 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2574 vectype, *poffset);
2575 *alignment_support_scheme
2576 = vect_supportable_dr_alignment (vinfo,
2577 STMT_VINFO_DR_INFO (stmt_info),
2578 vectype, *misalignment);
2582 if ((*memory_access_type == VMAT_ELEMENTWISE
2583 || *memory_access_type == VMAT_STRIDED_SLP)
2584 && !nunits.is_constant ())
2586 if (dump_enabled_p ())
2587 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2588 "Not using elementwise accesses due to variable "
2589 "vectorization factor.\n");
2590 return false;
2593 if (*alignment_support_scheme == dr_unaligned_unsupported)
2595 if (dump_enabled_p ())
2596 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2597 "unsupported unaligned access\n");
2598 return false;
2601 /* FIXME: At the moment the cost model seems to underestimate the
2602 cost of using elementwise accesses. This check preserves the
2603 traditional behavior until that can be fixed. */
2604 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2605 if (!first_stmt_info)
2606 first_stmt_info = stmt_info;
2607 if (*memory_access_type == VMAT_ELEMENTWISE
2608 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2609 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2610 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2611 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2613 if (dump_enabled_p ())
2614 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2615 "not falling back to elementwise accesses\n");
2616 return false;
2618 return true;
2621 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2622 conditional operation STMT_INFO. When returning true, store the mask
2623 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2624 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2625 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2627 static bool
2628 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2629 slp_tree slp_node, unsigned mask_index,
2630 tree *mask, slp_tree *mask_node,
2631 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2633 enum vect_def_type mask_dt;
2634 tree mask_vectype;
2635 slp_tree mask_node_1;
2636 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2637 mask, &mask_node_1, &mask_dt, &mask_vectype))
2639 if (dump_enabled_p ())
2640 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2641 "mask use not simple.\n");
2642 return false;
2645 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2647 if (dump_enabled_p ())
2648 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2649 "mask argument is not a boolean.\n");
2650 return false;
2653 /* If the caller is not prepared for adjusting an external/constant
2654 SLP mask vector type fail. */
2655 if (slp_node
2656 && !mask_node
2657 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2659 if (dump_enabled_p ())
2660 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2661 "SLP mask argument is not vectorized.\n");
2662 return false;
2665 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2666 if (!mask_vectype)
2667 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2669 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2671 if (dump_enabled_p ())
2672 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2673 "could not find an appropriate vector mask type.\n");
2674 return false;
2677 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2678 TYPE_VECTOR_SUBPARTS (vectype)))
2680 if (dump_enabled_p ())
2681 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2682 "vector mask type %T"
2683 " does not match vector data type %T.\n",
2684 mask_vectype, vectype);
2686 return false;
2689 *mask_dt_out = mask_dt;
2690 *mask_vectype_out = mask_vectype;
2691 if (mask_node)
2692 *mask_node = mask_node_1;
2693 return true;
2696 /* Return true if stored value RHS is suitable for vectorizing store
2697 statement STMT_INFO. When returning true, store the type of the
2698 definition in *RHS_DT_OUT, the type of the vectorized store value in
2699 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2701 static bool
2702 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2703 slp_tree slp_node, tree rhs,
2704 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2705 vec_load_store_type *vls_type_out)
2707 /* In the case this is a store from a constant make sure
2708 native_encode_expr can handle it. */
2709 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2711 if (dump_enabled_p ())
2712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2713 "cannot encode constant as a byte sequence.\n");
2714 return false;
2717 unsigned op_no = 0;
2718 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2720 if (gimple_call_internal_p (call)
2721 && internal_store_fn_p (gimple_call_internal_fn (call)))
2722 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2725 enum vect_def_type rhs_dt;
2726 tree rhs_vectype;
2727 slp_tree slp_op;
2728 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2729 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2731 if (dump_enabled_p ())
2732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2733 "use not simple.\n");
2734 return false;
2737 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2738 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2740 if (dump_enabled_p ())
2741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2742 "incompatible vector types.\n");
2743 return false;
2746 *rhs_dt_out = rhs_dt;
2747 *rhs_vectype_out = rhs_vectype;
2748 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2749 *vls_type_out = VLS_STORE_INVARIANT;
2750 else
2751 *vls_type_out = VLS_STORE;
2752 return true;
2755 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2756 Note that we support masks with floating-point type, in which case the
2757 floats are interpreted as a bitmask. */
2759 static tree
2760 vect_build_all_ones_mask (vec_info *vinfo,
2761 stmt_vec_info stmt_info, tree masktype)
2763 if (TREE_CODE (masktype) == INTEGER_TYPE)
2764 return build_int_cst (masktype, -1);
2765 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2767 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2768 mask = build_vector_from_val (masktype, mask);
2769 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2771 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2773 REAL_VALUE_TYPE r;
2774 long tmp[6];
2775 for (int j = 0; j < 6; ++j)
2776 tmp[j] = -1;
2777 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2778 tree mask = build_real (TREE_TYPE (masktype), r);
2779 mask = build_vector_from_val (masktype, mask);
2780 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2782 gcc_unreachable ();
2785 /* Build an all-zero merge value of type VECTYPE while vectorizing
2786 STMT_INFO as a gather load. */
2788 static tree
2789 vect_build_zero_merge_argument (vec_info *vinfo,
2790 stmt_vec_info stmt_info, tree vectype)
2792 tree merge;
2793 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2794 merge = build_int_cst (TREE_TYPE (vectype), 0);
2795 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2797 REAL_VALUE_TYPE r;
2798 long tmp[6];
2799 for (int j = 0; j < 6; ++j)
2800 tmp[j] = 0;
2801 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2802 merge = build_real (TREE_TYPE (vectype), r);
2804 else
2805 gcc_unreachable ();
2806 merge = build_vector_from_val (vectype, merge);
2807 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2810 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2811 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2812 the gather load operation. If the load is conditional, MASK is the
2813 unvectorized condition and MASK_DT is its definition type, otherwise
2814 MASK is null. */
2816 static void
2817 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2818 gimple_stmt_iterator *gsi,
2819 gimple **vec_stmt,
2820 gather_scatter_info *gs_info,
2821 tree mask)
2823 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2824 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2825 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2826 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2827 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2828 edge pe = loop_preheader_edge (loop);
2829 enum { NARROW, NONE, WIDEN } modifier;
2830 poly_uint64 gather_off_nunits
2831 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2833 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2834 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2835 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2836 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2837 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2838 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2839 tree scaletype = TREE_VALUE (arglist);
2840 tree real_masktype = masktype;
2841 gcc_checking_assert (types_compatible_p (srctype, rettype)
2842 && (!mask
2843 || TREE_CODE (masktype) == INTEGER_TYPE
2844 || types_compatible_p (srctype, masktype)));
2845 if (mask)
2846 masktype = truth_type_for (srctype);
2848 tree mask_halftype = masktype;
2849 tree perm_mask = NULL_TREE;
2850 tree mask_perm_mask = NULL_TREE;
2851 if (known_eq (nunits, gather_off_nunits))
2852 modifier = NONE;
2853 else if (known_eq (nunits * 2, gather_off_nunits))
2855 modifier = WIDEN;
2857 /* Currently widening gathers and scatters are only supported for
2858 fixed-length vectors. */
2859 int count = gather_off_nunits.to_constant ();
2860 vec_perm_builder sel (count, count, 1);
2861 for (int i = 0; i < count; ++i)
2862 sel.quick_push (i | (count / 2));
2864 vec_perm_indices indices (sel, 1, count);
2865 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2866 indices);
2868 else if (known_eq (nunits, gather_off_nunits * 2))
2870 modifier = NARROW;
2872 /* Currently narrowing gathers and scatters are only supported for
2873 fixed-length vectors. */
2874 int count = nunits.to_constant ();
2875 vec_perm_builder sel (count, count, 1);
2876 sel.quick_grow (count);
2877 for (int i = 0; i < count; ++i)
2878 sel[i] = i < count / 2 ? i : i + count / 2;
2879 vec_perm_indices indices (sel, 2, count);
2880 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2882 ncopies *= 2;
2884 if (mask && VECTOR_TYPE_P (real_masktype))
2886 for (int i = 0; i < count; ++i)
2887 sel[i] = i | (count / 2);
2888 indices.new_vector (sel, 2, count);
2889 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2891 else if (mask)
2892 mask_halftype = truth_type_for (gs_info->offset_vectype);
2894 else
2895 gcc_unreachable ();
2897 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2898 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2900 tree ptr = fold_convert (ptrtype, gs_info->base);
2901 if (!is_gimple_min_invariant (ptr))
2903 gimple_seq seq;
2904 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2905 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2906 gcc_assert (!new_bb);
2909 tree scale = build_int_cst (scaletype, gs_info->scale);
2911 tree vec_oprnd0 = NULL_TREE;
2912 tree vec_mask = NULL_TREE;
2913 tree src_op = NULL_TREE;
2914 tree mask_op = NULL_TREE;
2915 tree prev_res = NULL_TREE;
2917 if (!mask)
2919 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2920 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2923 auto_vec<tree> vec_oprnds0;
2924 auto_vec<tree> vec_masks;
2925 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2926 modifier == WIDEN ? ncopies / 2 : ncopies,
2927 gs_info->offset, &vec_oprnds0);
2928 if (mask)
2929 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2930 modifier == NARROW ? ncopies / 2 : ncopies,
2931 mask, &vec_masks, masktype);
2932 for (int j = 0; j < ncopies; ++j)
2934 tree op, var;
2935 if (modifier == WIDEN && (j & 1))
2936 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2937 perm_mask, stmt_info, gsi);
2938 else
2939 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2941 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2943 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2944 TYPE_VECTOR_SUBPARTS (idxtype)));
2945 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2946 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2947 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2948 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2949 op = var;
2952 if (mask)
2954 if (mask_perm_mask && (j & 1))
2955 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2956 mask_perm_mask, stmt_info, gsi);
2957 else
2959 if (modifier == NARROW)
2961 if ((j & 1) == 0)
2962 vec_mask = vec_masks[j / 2];
2964 else
2965 vec_mask = vec_masks[j];
2967 mask_op = vec_mask;
2968 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2970 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2971 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2972 gcc_assert (known_eq (sub1, sub2));
2973 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2974 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2975 gassign *new_stmt
2976 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2977 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2978 mask_op = var;
2981 if (modifier == NARROW && !VECTOR_TYPE_P (real_masktype))
2983 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2984 gassign *new_stmt
2985 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2986 : VEC_UNPACK_LO_EXPR,
2987 mask_op);
2988 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2989 mask_op = var;
2991 src_op = mask_op;
2994 tree mask_arg = mask_op;
2995 if (masktype != real_masktype)
2997 tree utype, optype = TREE_TYPE (mask_op);
2998 if (VECTOR_TYPE_P (real_masktype)
2999 || TYPE_MODE (real_masktype) == TYPE_MODE (optype))
3000 utype = real_masktype;
3001 else
3002 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
3003 var = vect_get_new_ssa_name (utype, vect_scalar_var);
3004 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
3005 gassign *new_stmt
3006 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
3007 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3008 mask_arg = var;
3009 if (!useless_type_conversion_p (real_masktype, utype))
3011 gcc_assert (TYPE_PRECISION (utype)
3012 <= TYPE_PRECISION (real_masktype));
3013 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
3014 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
3015 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3016 mask_arg = var;
3018 src_op = build_zero_cst (srctype);
3020 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
3021 mask_arg, scale);
3023 if (!useless_type_conversion_p (vectype, rettype))
3025 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
3026 TYPE_VECTOR_SUBPARTS (rettype)));
3027 op = vect_get_new_ssa_name (rettype, vect_simple_var);
3028 gimple_call_set_lhs (new_stmt, op);
3029 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3030 var = make_ssa_name (vec_dest);
3031 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
3032 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
3033 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3035 else
3037 var = make_ssa_name (vec_dest, new_stmt);
3038 gimple_call_set_lhs (new_stmt, var);
3039 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3042 if (modifier == NARROW)
3044 if ((j & 1) == 0)
3046 prev_res = var;
3047 continue;
3049 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
3050 stmt_info, gsi);
3051 new_stmt = SSA_NAME_DEF_STMT (var);
3054 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3056 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3059 /* Prepare the base and offset in GS_INFO for vectorization.
3060 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
3061 to the vectorized offset argument for the first copy of STMT_INFO.
3062 STMT_INFO is the statement described by GS_INFO and LOOP is the
3063 containing loop. */
3065 static void
3066 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
3067 class loop *loop, stmt_vec_info stmt_info,
3068 slp_tree slp_node, gather_scatter_info *gs_info,
3069 tree *dataref_ptr, vec<tree> *vec_offset)
3071 gimple_seq stmts = NULL;
3072 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
3073 if (stmts != NULL)
3075 basic_block new_bb;
3076 edge pe = loop_preheader_edge (loop);
3077 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3078 gcc_assert (!new_bb);
3080 if (slp_node)
3081 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
3082 else
3084 unsigned ncopies
3085 = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
3086 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
3087 gs_info->offset, vec_offset,
3088 gs_info->offset_vectype);
3092 /* Prepare to implement a grouped or strided load or store using
3093 the gather load or scatter store operation described by GS_INFO.
3094 STMT_INFO is the load or store statement.
3096 Set *DATAREF_BUMP to the amount that should be added to the base
3097 address after each copy of the vectorized statement. Set *VEC_OFFSET
3098 to an invariant offset vector in which element I has the value
3099 I * DR_STEP / SCALE. */
3101 static void
3102 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
3103 loop_vec_info loop_vinfo,
3104 gather_scatter_info *gs_info,
3105 tree *dataref_bump, tree *vec_offset)
3107 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3108 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3110 tree bump = size_binop (MULT_EXPR,
3111 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3112 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3113 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
3115 /* The offset given in GS_INFO can have pointer type, so use the element
3116 type of the vector instead. */
3117 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
3119 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3120 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3121 ssize_int (gs_info->scale));
3122 step = fold_convert (offset_type, step);
3124 /* Create {0, X, X*2, X*3, ...}. */
3125 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
3126 build_zero_cst (offset_type), step);
3127 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
3130 /* Return the amount that should be added to a vector pointer to move
3131 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3132 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3133 vectorization. */
3135 static tree
3136 vect_get_data_ptr_increment (vec_info *vinfo,
3137 dr_vec_info *dr_info, tree aggr_type,
3138 vect_memory_access_type memory_access_type)
3140 if (memory_access_type == VMAT_INVARIANT)
3141 return size_zero_node;
3143 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3144 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3145 if (tree_int_cst_sgn (step) == -1)
3146 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3147 return iv_step;
3150 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3152 static bool
3153 vectorizable_bswap (vec_info *vinfo,
3154 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3155 gimple **vec_stmt, slp_tree slp_node,
3156 slp_tree *slp_op,
3157 tree vectype_in, stmt_vector_for_cost *cost_vec)
3159 tree op, vectype;
3160 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3161 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3162 unsigned ncopies;
3164 op = gimple_call_arg (stmt, 0);
3165 vectype = STMT_VINFO_VECTYPE (stmt_info);
3166 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3168 /* Multiple types in SLP are handled by creating the appropriate number of
3169 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3170 case of SLP. */
3171 if (slp_node)
3172 ncopies = 1;
3173 else
3174 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3176 gcc_assert (ncopies >= 1);
3178 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3179 if (! char_vectype)
3180 return false;
3182 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3183 unsigned word_bytes;
3184 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3185 return false;
3187 /* The encoding uses one stepped pattern for each byte in the word. */
3188 vec_perm_builder elts (num_bytes, word_bytes, 3);
3189 for (unsigned i = 0; i < 3; ++i)
3190 for (unsigned j = 0; j < word_bytes; ++j)
3191 elts.quick_push ((i + 1) * word_bytes - j - 1);
3193 vec_perm_indices indices (elts, 1, num_bytes);
3194 machine_mode vmode = TYPE_MODE (char_vectype);
3195 if (!can_vec_perm_const_p (vmode, vmode, indices))
3196 return false;
3198 if (! vec_stmt)
3200 if (slp_node
3201 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3203 if (dump_enabled_p ())
3204 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3205 "incompatible vector types for invariants\n");
3206 return false;
3209 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3210 DUMP_VECT_SCOPE ("vectorizable_bswap");
3211 record_stmt_cost (cost_vec,
3212 1, vector_stmt, stmt_info, 0, vect_prologue);
3213 record_stmt_cost (cost_vec,
3214 slp_node
3215 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3216 vec_perm, stmt_info, 0, vect_body);
3217 return true;
3220 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3222 /* Transform. */
3223 vec<tree> vec_oprnds = vNULL;
3224 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3225 op, &vec_oprnds);
3226 /* Arguments are ready. create the new vector stmt. */
3227 unsigned i;
3228 tree vop;
3229 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3231 gimple *new_stmt;
3232 tree tem = make_ssa_name (char_vectype);
3233 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3234 char_vectype, vop));
3235 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3236 tree tem2 = make_ssa_name (char_vectype);
3237 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3238 tem, tem, bswap_vconst);
3239 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3240 tem = make_ssa_name (vectype);
3241 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3242 vectype, tem2));
3243 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3244 if (slp_node)
3245 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3246 else
3247 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3250 if (!slp_node)
3251 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3253 vec_oprnds.release ();
3254 return true;
3257 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3258 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3259 in a single step. On success, store the binary pack code in
3260 *CONVERT_CODE. */
3262 static bool
3263 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3264 tree_code *convert_code)
3266 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3267 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3268 return false;
3270 tree_code code;
3271 int multi_step_cvt = 0;
3272 auto_vec <tree, 8> interm_types;
3273 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3274 &code, &multi_step_cvt, &interm_types)
3275 || multi_step_cvt)
3276 return false;
3278 *convert_code = code;
3279 return true;
3282 /* Function vectorizable_call.
3284 Check if STMT_INFO performs a function call that can be vectorized.
3285 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3286 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3287 Return true if STMT_INFO is vectorizable in this way. */
3289 static bool
3290 vectorizable_call (vec_info *vinfo,
3291 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3292 gimple **vec_stmt, slp_tree slp_node,
3293 stmt_vector_for_cost *cost_vec)
3295 gcall *stmt;
3296 tree vec_dest;
3297 tree scalar_dest;
3298 tree op;
3299 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3300 tree vectype_out, vectype_in;
3301 poly_uint64 nunits_in;
3302 poly_uint64 nunits_out;
3303 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3304 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3305 tree fndecl, new_temp, rhs_type;
3306 enum vect_def_type dt[4]
3307 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3308 vect_unknown_def_type };
3309 tree vectypes[ARRAY_SIZE (dt)] = {};
3310 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3311 int ndts = ARRAY_SIZE (dt);
3312 int ncopies, j;
3313 auto_vec<tree, 8> vargs;
3314 enum { NARROW, NONE, WIDEN } modifier;
3315 size_t i, nargs;
3316 tree lhs;
3318 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3319 return false;
3321 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3322 && ! vec_stmt)
3323 return false;
3325 /* Is STMT_INFO a vectorizable call? */
3326 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3327 if (!stmt)
3328 return false;
3330 if (gimple_call_internal_p (stmt)
3331 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3332 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3333 /* Handled by vectorizable_load and vectorizable_store. */
3334 return false;
3336 if (gimple_call_lhs (stmt) == NULL_TREE
3337 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3338 return false;
3340 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3342 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3344 /* Process function arguments. */
3345 rhs_type = NULL_TREE;
3346 vectype_in = NULL_TREE;
3347 nargs = gimple_call_num_args (stmt);
3349 /* Bail out if the function has more than four arguments, we do not have
3350 interesting builtin functions to vectorize with more than two arguments
3351 except for fma. No arguments is also not good. */
3352 if (nargs == 0 || nargs > 4)
3353 return false;
3355 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3356 combined_fn cfn = gimple_call_combined_fn (stmt);
3357 if (cfn == CFN_GOMP_SIMD_LANE)
3359 nargs = 0;
3360 rhs_type = unsigned_type_node;
3363 int mask_opno = -1;
3364 if (internal_fn_p (cfn))
3365 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3367 for (i = 0; i < nargs; i++)
3369 if ((int) i == mask_opno)
3371 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3372 &op, &slp_op[i], &dt[i], &vectypes[i]))
3373 return false;
3374 continue;
3377 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3378 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3380 if (dump_enabled_p ())
3381 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3382 "use not simple.\n");
3383 return false;
3386 /* We can only handle calls with arguments of the same type. */
3387 if (rhs_type
3388 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3390 if (dump_enabled_p ())
3391 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3392 "argument types differ.\n");
3393 return false;
3395 if (!rhs_type)
3396 rhs_type = TREE_TYPE (op);
3398 if (!vectype_in)
3399 vectype_in = vectypes[i];
3400 else if (vectypes[i]
3401 && !types_compatible_p (vectypes[i], vectype_in))
3403 if (dump_enabled_p ())
3404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3405 "argument vector types differ.\n");
3406 return false;
3409 /* If all arguments are external or constant defs, infer the vector type
3410 from the scalar type. */
3411 if (!vectype_in)
3412 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3413 if (vec_stmt)
3414 gcc_assert (vectype_in);
3415 if (!vectype_in)
3417 if (dump_enabled_p ())
3418 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3419 "no vectype for scalar type %T\n", rhs_type);
3421 return false;
3423 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3424 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3425 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3426 by a pack of the two vectors into an SI vector. We would need
3427 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3428 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3430 if (dump_enabled_p ())
3431 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3432 "mismatched vector sizes %T and %T\n",
3433 vectype_in, vectype_out);
3434 return false;
3437 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3438 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3440 if (dump_enabled_p ())
3441 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3442 "mixed mask and nonmask vector types\n");
3443 return false;
3446 if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out))
3448 if (dump_enabled_p ())
3449 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3450 "use emulated vector type for call\n");
3451 return false;
3454 /* FORNOW */
3455 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3456 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3457 if (known_eq (nunits_in * 2, nunits_out))
3458 modifier = NARROW;
3459 else if (known_eq (nunits_out, nunits_in))
3460 modifier = NONE;
3461 else if (known_eq (nunits_out * 2, nunits_in))
3462 modifier = WIDEN;
3463 else
3464 return false;
3466 /* We only handle functions that do not read or clobber memory. */
3467 if (gimple_vuse (stmt))
3469 if (dump_enabled_p ())
3470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3471 "function reads from or writes to memory.\n");
3472 return false;
3475 /* For now, we only vectorize functions if a target specific builtin
3476 is available. TODO -- in some cases, it might be profitable to
3477 insert the calls for pieces of the vector, in order to be able
3478 to vectorize other operations in the loop. */
3479 fndecl = NULL_TREE;
3480 internal_fn ifn = IFN_LAST;
3481 tree callee = gimple_call_fndecl (stmt);
3483 /* First try using an internal function. */
3484 tree_code convert_code = ERROR_MARK;
3485 if (cfn != CFN_LAST
3486 && (modifier == NONE
3487 || (modifier == NARROW
3488 && simple_integer_narrowing (vectype_out, vectype_in,
3489 &convert_code))))
3490 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3491 vectype_in);
3493 /* If that fails, try asking for a target-specific built-in function. */
3494 if (ifn == IFN_LAST)
3496 if (cfn != CFN_LAST)
3497 fndecl = targetm.vectorize.builtin_vectorized_function
3498 (cfn, vectype_out, vectype_in);
3499 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3500 fndecl = targetm.vectorize.builtin_md_vectorized_function
3501 (callee, vectype_out, vectype_in);
3504 if (ifn == IFN_LAST && !fndecl)
3506 if (cfn == CFN_GOMP_SIMD_LANE
3507 && !slp_node
3508 && loop_vinfo
3509 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3510 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3511 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3512 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3514 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3515 { 0, 1, 2, ... vf - 1 } vector. */
3516 gcc_assert (nargs == 0);
3518 else if (modifier == NONE
3519 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3520 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3521 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3522 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3523 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3524 slp_op, vectype_in, cost_vec);
3525 else
3527 if (dump_enabled_p ())
3528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3529 "function is not vectorizable.\n");
3530 return false;
3534 if (slp_node)
3535 ncopies = 1;
3536 else if (modifier == NARROW && ifn == IFN_LAST)
3537 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3538 else
3539 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3541 /* Sanity check: make sure that at least one copy of the vectorized stmt
3542 needs to be generated. */
3543 gcc_assert (ncopies >= 1);
3545 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3546 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3547 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3548 if (!vec_stmt) /* transformation not required. */
3550 if (slp_node)
3551 for (i = 0; i < nargs; ++i)
3552 if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3553 vectypes[i]
3554 ? vectypes[i] : vectype_in))
3556 if (dump_enabled_p ())
3557 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3558 "incompatible vector types for invariants\n");
3559 return false;
3561 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3562 DUMP_VECT_SCOPE ("vectorizable_call");
3563 vect_model_simple_cost (vinfo, stmt_info,
3564 ncopies, dt, ndts, slp_node, cost_vec);
3565 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3566 record_stmt_cost (cost_vec, ncopies / 2,
3567 vec_promote_demote, stmt_info, 0, vect_body);
3569 if (loop_vinfo
3570 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3571 && (reduc_idx >= 0 || mask_opno >= 0))
3573 if (reduc_idx >= 0
3574 && (cond_fn == IFN_LAST
3575 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3576 OPTIMIZE_FOR_SPEED)))
3578 if (dump_enabled_p ())
3579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3580 "can't use a fully-masked loop because no"
3581 " conditional operation is available.\n");
3582 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3584 else
3586 unsigned int nvectors
3587 = (slp_node
3588 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3589 : ncopies);
3590 tree scalar_mask = NULL_TREE;
3591 if (mask_opno >= 0)
3592 scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3593 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3594 vectype_out, scalar_mask);
3597 return true;
3600 /* Transform. */
3602 if (dump_enabled_p ())
3603 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3605 /* Handle def. */
3606 scalar_dest = gimple_call_lhs (stmt);
3607 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3609 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3610 unsigned int vect_nargs = nargs;
3611 if (masked_loop_p && reduc_idx >= 0)
3613 ifn = cond_fn;
3614 vect_nargs += 2;
3617 if (modifier == NONE || ifn != IFN_LAST)
3619 tree prev_res = NULL_TREE;
3620 vargs.safe_grow (vect_nargs, true);
3621 auto_vec<vec<tree> > vec_defs (nargs);
3622 for (j = 0; j < ncopies; ++j)
3624 /* Build argument list for the vectorized call. */
3625 if (slp_node)
3627 vec<tree> vec_oprnds0;
3629 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3630 vec_oprnds0 = vec_defs[0];
3632 /* Arguments are ready. Create the new vector stmt. */
3633 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3635 int varg = 0;
3636 if (masked_loop_p && reduc_idx >= 0)
3638 unsigned int vec_num = vec_oprnds0.length ();
3639 /* Always true for SLP. */
3640 gcc_assert (ncopies == 1);
3641 vargs[varg++] = vect_get_loop_mask (gsi, masks, vec_num,
3642 vectype_out, i);
3644 size_t k;
3645 for (k = 0; k < nargs; k++)
3647 vec<tree> vec_oprndsk = vec_defs[k];
3648 vargs[varg++] = vec_oprndsk[i];
3650 if (masked_loop_p && reduc_idx >= 0)
3651 vargs[varg++] = vargs[reduc_idx + 1];
3652 gimple *new_stmt;
3653 if (modifier == NARROW)
3655 /* We don't define any narrowing conditional functions
3656 at present. */
3657 gcc_assert (mask_opno < 0);
3658 tree half_res = make_ssa_name (vectype_in);
3659 gcall *call
3660 = gimple_build_call_internal_vec (ifn, vargs);
3661 gimple_call_set_lhs (call, half_res);
3662 gimple_call_set_nothrow (call, true);
3663 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3664 if ((i & 1) == 0)
3666 prev_res = half_res;
3667 continue;
3669 new_temp = make_ssa_name (vec_dest);
3670 new_stmt = gimple_build_assign (new_temp, convert_code,
3671 prev_res, half_res);
3672 vect_finish_stmt_generation (vinfo, stmt_info,
3673 new_stmt, gsi);
3675 else
3677 if (mask_opno >= 0 && masked_loop_p)
3679 unsigned int vec_num = vec_oprnds0.length ();
3680 /* Always true for SLP. */
3681 gcc_assert (ncopies == 1);
3682 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3683 vectype_out, i);
3684 vargs[mask_opno] = prepare_vec_mask
3685 (loop_vinfo, TREE_TYPE (mask), mask,
3686 vargs[mask_opno], gsi);
3689 gcall *call;
3690 if (ifn != IFN_LAST)
3691 call = gimple_build_call_internal_vec (ifn, vargs);
3692 else
3693 call = gimple_build_call_vec (fndecl, vargs);
3694 new_temp = make_ssa_name (vec_dest, call);
3695 gimple_call_set_lhs (call, new_temp);
3696 gimple_call_set_nothrow (call, true);
3697 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3698 new_stmt = call;
3700 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3702 continue;
3705 int varg = 0;
3706 if (masked_loop_p && reduc_idx >= 0)
3707 vargs[varg++] = vect_get_loop_mask (gsi, masks, ncopies,
3708 vectype_out, j);
3709 for (i = 0; i < nargs; i++)
3711 op = gimple_call_arg (stmt, i);
3712 if (j == 0)
3714 vec_defs.quick_push (vNULL);
3715 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3716 op, &vec_defs[i],
3717 vectypes[i]);
3719 vargs[varg++] = vec_defs[i][j];
3721 if (masked_loop_p && reduc_idx >= 0)
3722 vargs[varg++] = vargs[reduc_idx + 1];
3724 if (mask_opno >= 0 && masked_loop_p)
3726 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3727 vectype_out, j);
3728 vargs[mask_opno]
3729 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3730 vargs[mask_opno], gsi);
3733 gimple *new_stmt;
3734 if (cfn == CFN_GOMP_SIMD_LANE)
3736 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3737 tree new_var
3738 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3739 gimple *init_stmt = gimple_build_assign (new_var, cst);
3740 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3741 new_temp = make_ssa_name (vec_dest);
3742 new_stmt = gimple_build_assign (new_temp, new_var);
3743 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3745 else if (modifier == NARROW)
3747 /* We don't define any narrowing conditional functions at
3748 present. */
3749 gcc_assert (mask_opno < 0);
3750 tree half_res = make_ssa_name (vectype_in);
3751 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3752 gimple_call_set_lhs (call, half_res);
3753 gimple_call_set_nothrow (call, true);
3754 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3755 if ((j & 1) == 0)
3757 prev_res = half_res;
3758 continue;
3760 new_temp = make_ssa_name (vec_dest);
3761 new_stmt = gimple_build_assign (new_temp, convert_code,
3762 prev_res, half_res);
3763 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3765 else
3767 gcall *call;
3768 if (ifn != IFN_LAST)
3769 call = gimple_build_call_internal_vec (ifn, vargs);
3770 else
3771 call = gimple_build_call_vec (fndecl, vargs);
3772 new_temp = make_ssa_name (vec_dest, call);
3773 gimple_call_set_lhs (call, new_temp);
3774 gimple_call_set_nothrow (call, true);
3775 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3776 new_stmt = call;
3779 if (j == (modifier == NARROW ? 1 : 0))
3780 *vec_stmt = new_stmt;
3781 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3783 for (i = 0; i < nargs; i++)
3785 vec<tree> vec_oprndsi = vec_defs[i];
3786 vec_oprndsi.release ();
3789 else if (modifier == NARROW)
3791 auto_vec<vec<tree> > vec_defs (nargs);
3792 /* We don't define any narrowing conditional functions at present. */
3793 gcc_assert (mask_opno < 0);
3794 for (j = 0; j < ncopies; ++j)
3796 /* Build argument list for the vectorized call. */
3797 if (j == 0)
3798 vargs.create (nargs * 2);
3799 else
3800 vargs.truncate (0);
3802 if (slp_node)
3804 vec<tree> vec_oprnds0;
3806 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3807 vec_oprnds0 = vec_defs[0];
3809 /* Arguments are ready. Create the new vector stmt. */
3810 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3812 size_t k;
3813 vargs.truncate (0);
3814 for (k = 0; k < nargs; k++)
3816 vec<tree> vec_oprndsk = vec_defs[k];
3817 vargs.quick_push (vec_oprndsk[i]);
3818 vargs.quick_push (vec_oprndsk[i + 1]);
3820 gcall *call;
3821 if (ifn != IFN_LAST)
3822 call = gimple_build_call_internal_vec (ifn, vargs);
3823 else
3824 call = gimple_build_call_vec (fndecl, vargs);
3825 new_temp = make_ssa_name (vec_dest, call);
3826 gimple_call_set_lhs (call, new_temp);
3827 gimple_call_set_nothrow (call, true);
3828 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3829 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3831 continue;
3834 for (i = 0; i < nargs; i++)
3836 op = gimple_call_arg (stmt, i);
3837 if (j == 0)
3839 vec_defs.quick_push (vNULL);
3840 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3841 op, &vec_defs[i], vectypes[i]);
3843 vec_oprnd0 = vec_defs[i][2*j];
3844 vec_oprnd1 = vec_defs[i][2*j+1];
3846 vargs.quick_push (vec_oprnd0);
3847 vargs.quick_push (vec_oprnd1);
3850 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3851 new_temp = make_ssa_name (vec_dest, new_stmt);
3852 gimple_call_set_lhs (new_stmt, new_temp);
3853 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3855 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3858 if (!slp_node)
3859 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3861 for (i = 0; i < nargs; i++)
3863 vec<tree> vec_oprndsi = vec_defs[i];
3864 vec_oprndsi.release ();
3867 else
3868 /* No current target implements this case. */
3869 return false;
3871 vargs.release ();
3873 /* The call in STMT might prevent it from being removed in dce.
3874 We however cannot remove it here, due to the way the ssa name
3875 it defines is mapped to the new definition. So just replace
3876 rhs of the statement with something harmless. */
3878 if (slp_node)
3879 return true;
3881 stmt_info = vect_orig_stmt (stmt_info);
3882 lhs = gimple_get_lhs (stmt_info->stmt);
3884 gassign *new_stmt
3885 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3886 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3888 return true;
3892 struct simd_call_arg_info
3894 tree vectype;
3895 tree op;
3896 HOST_WIDE_INT linear_step;
3897 enum vect_def_type dt;
3898 unsigned int align;
3899 bool simd_lane_linear;
3902 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3903 is linear within simd lane (but not within whole loop), note it in
3904 *ARGINFO. */
3906 static void
3907 vect_simd_lane_linear (tree op, class loop *loop,
3908 struct simd_call_arg_info *arginfo)
3910 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3912 if (!is_gimple_assign (def_stmt)
3913 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3914 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3915 return;
3917 tree base = gimple_assign_rhs1 (def_stmt);
3918 HOST_WIDE_INT linear_step = 0;
3919 tree v = gimple_assign_rhs2 (def_stmt);
3920 while (TREE_CODE (v) == SSA_NAME)
3922 tree t;
3923 def_stmt = SSA_NAME_DEF_STMT (v);
3924 if (is_gimple_assign (def_stmt))
3925 switch (gimple_assign_rhs_code (def_stmt))
3927 case PLUS_EXPR:
3928 t = gimple_assign_rhs2 (def_stmt);
3929 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3930 return;
3931 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3932 v = gimple_assign_rhs1 (def_stmt);
3933 continue;
3934 case MULT_EXPR:
3935 t = gimple_assign_rhs2 (def_stmt);
3936 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3937 return;
3938 linear_step = tree_to_shwi (t);
3939 v = gimple_assign_rhs1 (def_stmt);
3940 continue;
3941 CASE_CONVERT:
3942 t = gimple_assign_rhs1 (def_stmt);
3943 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3944 || (TYPE_PRECISION (TREE_TYPE (v))
3945 < TYPE_PRECISION (TREE_TYPE (t))))
3946 return;
3947 if (!linear_step)
3948 linear_step = 1;
3949 v = t;
3950 continue;
3951 default:
3952 return;
3954 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3955 && loop->simduid
3956 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3957 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3958 == loop->simduid))
3960 if (!linear_step)
3961 linear_step = 1;
3962 arginfo->linear_step = linear_step;
3963 arginfo->op = base;
3964 arginfo->simd_lane_linear = true;
3965 return;
3970 /* Return the number of elements in vector type VECTYPE, which is associated
3971 with a SIMD clone. At present these vectors always have a constant
3972 length. */
3974 static unsigned HOST_WIDE_INT
3975 simd_clone_subparts (tree vectype)
3977 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3980 /* Function vectorizable_simd_clone_call.
3982 Check if STMT_INFO performs a function call that can be vectorized
3983 by calling a simd clone of the function.
3984 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3985 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3986 Return true if STMT_INFO is vectorizable in this way. */
3988 static bool
3989 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3990 gimple_stmt_iterator *gsi,
3991 gimple **vec_stmt, slp_tree slp_node,
3992 stmt_vector_for_cost *)
3994 tree vec_dest;
3995 tree scalar_dest;
3996 tree op, type;
3997 tree vec_oprnd0 = NULL_TREE;
3998 tree vectype;
3999 poly_uint64 nunits;
4000 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4001 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4002 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
4003 tree fndecl, new_temp;
4004 int ncopies, j;
4005 auto_vec<simd_call_arg_info> arginfo;
4006 vec<tree> vargs = vNULL;
4007 size_t i, nargs;
4008 tree lhs, rtype, ratype;
4009 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
4010 int arg_offset = 0;
4012 /* Is STMT a vectorizable call? */
4013 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4014 if (!stmt)
4015 return false;
4017 fndecl = gimple_call_fndecl (stmt);
4018 if (fndecl == NULL_TREE
4019 && gimple_call_internal_p (stmt, IFN_MASK_CALL))
4021 fndecl = gimple_call_arg (stmt, 0);
4022 gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
4023 fndecl = TREE_OPERAND (fndecl, 0);
4024 gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
4025 arg_offset = 1;
4027 if (fndecl == NULL_TREE)
4028 return false;
4030 struct cgraph_node *node = cgraph_node::get (fndecl);
4031 if (node == NULL || node->simd_clones == NULL)
4032 return false;
4034 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4035 return false;
4037 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4038 && ! vec_stmt)
4039 return false;
4041 if (gimple_call_lhs (stmt)
4042 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
4043 return false;
4045 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
4047 vectype = STMT_VINFO_VECTYPE (stmt_info);
4049 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
4050 return false;
4052 /* FORNOW */
4053 if (slp_node)
4054 return false;
4056 /* Process function arguments. */
4057 nargs = gimple_call_num_args (stmt) - arg_offset;
4059 /* Bail out if the function has zero arguments. */
4060 if (nargs == 0)
4061 return false;
4063 arginfo.reserve (nargs, true);
4065 for (i = 0; i < nargs; i++)
4067 simd_call_arg_info thisarginfo;
4068 affine_iv iv;
4070 thisarginfo.linear_step = 0;
4071 thisarginfo.align = 0;
4072 thisarginfo.op = NULL_TREE;
4073 thisarginfo.simd_lane_linear = false;
4075 op = gimple_call_arg (stmt, i + arg_offset);
4076 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
4077 &thisarginfo.vectype)
4078 || thisarginfo.dt == vect_uninitialized_def)
4080 if (dump_enabled_p ())
4081 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4082 "use not simple.\n");
4083 return false;
4086 if (thisarginfo.dt == vect_constant_def
4087 || thisarginfo.dt == vect_external_def)
4088 gcc_assert (thisarginfo.vectype == NULL_TREE);
4089 else
4090 gcc_assert (thisarginfo.vectype != NULL_TREE);
4092 /* For linear arguments, the analyze phase should have saved
4093 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
4094 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
4095 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
4097 gcc_assert (vec_stmt);
4098 thisarginfo.linear_step
4099 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
4100 thisarginfo.op
4101 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
4102 thisarginfo.simd_lane_linear
4103 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
4104 == boolean_true_node);
4105 /* If loop has been peeled for alignment, we need to adjust it. */
4106 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4107 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4108 if (n1 != n2 && !thisarginfo.simd_lane_linear)
4110 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4111 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
4112 tree opt = TREE_TYPE (thisarginfo.op);
4113 bias = fold_convert (TREE_TYPE (step), bias);
4114 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4115 thisarginfo.op
4116 = fold_build2 (POINTER_TYPE_P (opt)
4117 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4118 thisarginfo.op, bias);
4121 else if (!vec_stmt
4122 && thisarginfo.dt != vect_constant_def
4123 && thisarginfo.dt != vect_external_def
4124 && loop_vinfo
4125 && TREE_CODE (op) == SSA_NAME
4126 && simple_iv (loop, loop_containing_stmt (stmt), op,
4127 &iv, false)
4128 && tree_fits_shwi_p (iv.step))
4130 thisarginfo.linear_step = tree_to_shwi (iv.step);
4131 thisarginfo.op = iv.base;
4133 else if ((thisarginfo.dt == vect_constant_def
4134 || thisarginfo.dt == vect_external_def)
4135 && POINTER_TYPE_P (TREE_TYPE (op)))
4136 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4137 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4138 linear too. */
4139 if (POINTER_TYPE_P (TREE_TYPE (op))
4140 && !thisarginfo.linear_step
4141 && !vec_stmt
4142 && thisarginfo.dt != vect_constant_def
4143 && thisarginfo.dt != vect_external_def
4144 && loop_vinfo
4145 && !slp_node
4146 && TREE_CODE (op) == SSA_NAME)
4147 vect_simd_lane_linear (op, loop, &thisarginfo);
4149 arginfo.quick_push (thisarginfo);
4152 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4153 if (!vf.is_constant ())
4155 if (dump_enabled_p ())
4156 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4157 "not considering SIMD clones; not yet supported"
4158 " for variable-width vectors.\n");
4159 return false;
4162 unsigned int badness = 0;
4163 struct cgraph_node *bestn = NULL;
4164 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4165 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4166 else
4167 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4168 n = n->simdclone->next_clone)
4170 unsigned int this_badness = 0;
4171 unsigned int num_calls;
4172 if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
4173 || n->simdclone->nargs != nargs)
4174 continue;
4175 if (num_calls != 1)
4176 this_badness += exact_log2 (num_calls) * 4096;
4177 if (n->simdclone->inbranch)
4178 this_badness += 8192;
4179 int target_badness = targetm.simd_clone.usable (n);
4180 if (target_badness < 0)
4181 continue;
4182 this_badness += target_badness * 512;
4183 for (i = 0; i < nargs; i++)
4185 switch (n->simdclone->args[i].arg_type)
4187 case SIMD_CLONE_ARG_TYPE_VECTOR:
4188 if (!useless_type_conversion_p
4189 (n->simdclone->args[i].orig_type,
4190 TREE_TYPE (gimple_call_arg (stmt, i + arg_offset))))
4191 i = -1;
4192 else if (arginfo[i].dt == vect_constant_def
4193 || arginfo[i].dt == vect_external_def
4194 || arginfo[i].linear_step)
4195 this_badness += 64;
4196 break;
4197 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4198 if (arginfo[i].dt != vect_constant_def
4199 && arginfo[i].dt != vect_external_def)
4200 i = -1;
4201 break;
4202 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4203 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4204 if (arginfo[i].dt == vect_constant_def
4205 || arginfo[i].dt == vect_external_def
4206 || (arginfo[i].linear_step
4207 != n->simdclone->args[i].linear_step))
4208 i = -1;
4209 break;
4210 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4211 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4212 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4213 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4214 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4215 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4216 /* FORNOW */
4217 i = -1;
4218 break;
4219 case SIMD_CLONE_ARG_TYPE_MASK:
4220 break;
4222 if (i == (size_t) -1)
4223 break;
4224 if (n->simdclone->args[i].alignment > arginfo[i].align)
4226 i = -1;
4227 break;
4229 if (arginfo[i].align)
4230 this_badness += (exact_log2 (arginfo[i].align)
4231 - exact_log2 (n->simdclone->args[i].alignment));
4233 if (i == (size_t) -1)
4234 continue;
4235 if (bestn == NULL || this_badness < badness)
4237 bestn = n;
4238 badness = this_badness;
4242 if (bestn == NULL)
4243 return false;
4245 for (i = 0; i < nargs; i++)
4247 if ((arginfo[i].dt == vect_constant_def
4248 || arginfo[i].dt == vect_external_def)
4249 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4251 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i + arg_offset));
4252 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4253 slp_node);
4254 if (arginfo[i].vectype == NULL
4255 || !constant_multiple_p (bestn->simdclone->simdlen,
4256 simd_clone_subparts (arginfo[i].vectype)))
4257 return false;
4260 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR
4261 && VECTOR_BOOLEAN_TYPE_P (bestn->simdclone->args[i].vector_type))
4263 if (dump_enabled_p ())
4264 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4265 "vector mask arguments are not supported.\n");
4266 return false;
4269 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK
4270 && bestn->simdclone->mask_mode == VOIDmode
4271 && (simd_clone_subparts (bestn->simdclone->args[i].vector_type)
4272 != simd_clone_subparts (arginfo[i].vectype)))
4274 /* FORNOW we only have partial support for vector-type masks that
4275 can't hold all of simdlen. */
4276 if (dump_enabled_p ())
4277 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4278 vect_location,
4279 "in-branch vector clones are not yet"
4280 " supported for mismatched vector sizes.\n");
4281 return false;
4283 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK
4284 && bestn->simdclone->mask_mode != VOIDmode)
4286 /* FORNOW don't support integer-type masks. */
4287 if (dump_enabled_p ())
4288 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4289 vect_location,
4290 "in-branch vector clones are not yet"
4291 " supported for integer mask modes.\n");
4292 return false;
4296 fndecl = bestn->decl;
4297 nunits = bestn->simdclone->simdlen;
4298 ncopies = vector_unroll_factor (vf, nunits);
4300 /* If the function isn't const, only allow it in simd loops where user
4301 has asserted that at least nunits consecutive iterations can be
4302 performed using SIMD instructions. */
4303 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4304 && gimple_vuse (stmt))
4305 return false;
4307 /* Sanity check: make sure that at least one copy of the vectorized stmt
4308 needs to be generated. */
4309 gcc_assert (ncopies >= 1);
4311 if (!vec_stmt) /* transformation not required. */
4313 /* When the original call is pure or const but the SIMD ABI dictates
4314 an aggregate return we will have to use a virtual definition and
4315 in a loop eventually even need to add a virtual PHI. That's
4316 not straight-forward so allow to fix this up via renaming. */
4317 if (gimple_call_lhs (stmt)
4318 && !gimple_vdef (stmt)
4319 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
4320 vinfo->any_known_not_updated_vssa = true;
4321 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4322 for (i = 0; i < nargs; i++)
4323 if ((bestn->simdclone->args[i].arg_type
4324 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4325 || (bestn->simdclone->args[i].arg_type
4326 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4328 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4329 + 1,
4330 true);
4331 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4332 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4333 ? size_type_node : TREE_TYPE (arginfo[i].op);
4334 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4335 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4336 tree sll = arginfo[i].simd_lane_linear
4337 ? boolean_true_node : boolean_false_node;
4338 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4340 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4341 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4342 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4343 dt, slp_node, cost_vec); */
4344 return true;
4347 /* Transform. */
4349 if (dump_enabled_p ())
4350 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4352 /* Handle def. */
4353 scalar_dest = gimple_call_lhs (stmt);
4354 vec_dest = NULL_TREE;
4355 rtype = NULL_TREE;
4356 ratype = NULL_TREE;
4357 if (scalar_dest)
4359 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4360 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4361 if (TREE_CODE (rtype) == ARRAY_TYPE)
4363 ratype = rtype;
4364 rtype = TREE_TYPE (ratype);
4368 auto_vec<vec<tree> > vec_oprnds;
4369 auto_vec<unsigned> vec_oprnds_i;
4370 vec_oprnds.safe_grow_cleared (nargs, true);
4371 vec_oprnds_i.safe_grow_cleared (nargs, true);
4372 for (j = 0; j < ncopies; ++j)
4374 /* Build argument list for the vectorized call. */
4375 if (j == 0)
4376 vargs.create (nargs);
4377 else
4378 vargs.truncate (0);
4380 for (i = 0; i < nargs; i++)
4382 unsigned int k, l, m, o;
4383 tree atype;
4384 op = gimple_call_arg (stmt, i + arg_offset);
4385 switch (bestn->simdclone->args[i].arg_type)
4387 case SIMD_CLONE_ARG_TYPE_VECTOR:
4388 atype = bestn->simdclone->args[i].vector_type;
4389 o = vector_unroll_factor (nunits,
4390 simd_clone_subparts (atype));
4391 for (m = j * o; m < (j + 1) * o; m++)
4393 if (simd_clone_subparts (atype)
4394 < simd_clone_subparts (arginfo[i].vectype))
4396 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4397 k = (simd_clone_subparts (arginfo[i].vectype)
4398 / simd_clone_subparts (atype));
4399 gcc_assert ((k & (k - 1)) == 0);
4400 if (m == 0)
4402 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4403 ncopies * o / k, op,
4404 &vec_oprnds[i]);
4405 vec_oprnds_i[i] = 0;
4406 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4408 else
4410 vec_oprnd0 = arginfo[i].op;
4411 if ((m & (k - 1)) == 0)
4412 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4414 arginfo[i].op = vec_oprnd0;
4415 vec_oprnd0
4416 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4417 bitsize_int (prec),
4418 bitsize_int ((m & (k - 1)) * prec));
4419 gassign *new_stmt
4420 = gimple_build_assign (make_ssa_name (atype),
4421 vec_oprnd0);
4422 vect_finish_stmt_generation (vinfo, stmt_info,
4423 new_stmt, gsi);
4424 vargs.safe_push (gimple_assign_lhs (new_stmt));
4426 else
4428 k = (simd_clone_subparts (atype)
4429 / simd_clone_subparts (arginfo[i].vectype));
4430 gcc_assert ((k & (k - 1)) == 0);
4431 vec<constructor_elt, va_gc> *ctor_elts;
4432 if (k != 1)
4433 vec_alloc (ctor_elts, k);
4434 else
4435 ctor_elts = NULL;
4436 for (l = 0; l < k; l++)
4438 if (m == 0 && l == 0)
4440 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4441 k * o * ncopies,
4443 &vec_oprnds[i]);
4444 vec_oprnds_i[i] = 0;
4445 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4447 else
4448 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4449 arginfo[i].op = vec_oprnd0;
4450 if (k == 1)
4451 break;
4452 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4453 vec_oprnd0);
4455 if (k == 1)
4456 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4457 atype))
4459 vec_oprnd0
4460 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4461 gassign *new_stmt
4462 = gimple_build_assign (make_ssa_name (atype),
4463 vec_oprnd0);
4464 vect_finish_stmt_generation (vinfo, stmt_info,
4465 new_stmt, gsi);
4466 vargs.safe_push (gimple_assign_lhs (new_stmt));
4468 else
4469 vargs.safe_push (vec_oprnd0);
4470 else
4472 vec_oprnd0 = build_constructor (atype, ctor_elts);
4473 gassign *new_stmt
4474 = gimple_build_assign (make_ssa_name (atype),
4475 vec_oprnd0);
4476 vect_finish_stmt_generation (vinfo, stmt_info,
4477 new_stmt, gsi);
4478 vargs.safe_push (gimple_assign_lhs (new_stmt));
4482 break;
4483 case SIMD_CLONE_ARG_TYPE_MASK:
4484 atype = bestn->simdclone->args[i].vector_type;
4485 if (bestn->simdclone->mask_mode != VOIDmode)
4487 /* FORNOW: this is disabled above. */
4488 gcc_unreachable ();
4490 else
4492 tree elt_type = TREE_TYPE (atype);
4493 tree one = fold_convert (elt_type, integer_one_node);
4494 tree zero = fold_convert (elt_type, integer_zero_node);
4495 o = vector_unroll_factor (nunits,
4496 simd_clone_subparts (atype));
4497 for (m = j * o; m < (j + 1) * o; m++)
4499 if (simd_clone_subparts (atype)
4500 < simd_clone_subparts (arginfo[i].vectype))
4502 /* The mask type has fewer elements than simdlen. */
4504 /* FORNOW */
4505 gcc_unreachable ();
4507 else if (simd_clone_subparts (atype)
4508 == simd_clone_subparts (arginfo[i].vectype))
4510 /* The SIMD clone function has the same number of
4511 elements as the current function. */
4512 if (m == 0)
4514 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4515 o * ncopies,
4517 &vec_oprnds[i]);
4518 vec_oprnds_i[i] = 0;
4520 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4521 vec_oprnd0
4522 = build3 (VEC_COND_EXPR, atype, vec_oprnd0,
4523 build_vector_from_val (atype, one),
4524 build_vector_from_val (atype, zero));
4525 gassign *new_stmt
4526 = gimple_build_assign (make_ssa_name (atype),
4527 vec_oprnd0);
4528 vect_finish_stmt_generation (vinfo, stmt_info,
4529 new_stmt, gsi);
4530 vargs.safe_push (gimple_assign_lhs (new_stmt));
4532 else
4534 /* The mask type has more elements than simdlen. */
4536 /* FORNOW */
4537 gcc_unreachable ();
4541 break;
4542 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4543 vargs.safe_push (op);
4544 break;
4545 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4546 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4547 if (j == 0)
4549 gimple_seq stmts;
4550 arginfo[i].op
4551 = force_gimple_operand (unshare_expr (arginfo[i].op),
4552 &stmts, true, NULL_TREE);
4553 if (stmts != NULL)
4555 basic_block new_bb;
4556 edge pe = loop_preheader_edge (loop);
4557 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4558 gcc_assert (!new_bb);
4560 if (arginfo[i].simd_lane_linear)
4562 vargs.safe_push (arginfo[i].op);
4563 break;
4565 tree phi_res = copy_ssa_name (op);
4566 gphi *new_phi = create_phi_node (phi_res, loop->header);
4567 add_phi_arg (new_phi, arginfo[i].op,
4568 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4569 enum tree_code code
4570 = POINTER_TYPE_P (TREE_TYPE (op))
4571 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4572 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4573 ? sizetype : TREE_TYPE (op);
4574 poly_widest_int cst
4575 = wi::mul (bestn->simdclone->args[i].linear_step,
4576 ncopies * nunits);
4577 tree tcst = wide_int_to_tree (type, cst);
4578 tree phi_arg = copy_ssa_name (op);
4579 gassign *new_stmt
4580 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4581 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4582 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4583 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4584 UNKNOWN_LOCATION);
4585 arginfo[i].op = phi_res;
4586 vargs.safe_push (phi_res);
4588 else
4590 enum tree_code code
4591 = POINTER_TYPE_P (TREE_TYPE (op))
4592 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4593 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4594 ? sizetype : TREE_TYPE (op);
4595 poly_widest_int cst
4596 = wi::mul (bestn->simdclone->args[i].linear_step,
4597 j * nunits);
4598 tree tcst = wide_int_to_tree (type, cst);
4599 new_temp = make_ssa_name (TREE_TYPE (op));
4600 gassign *new_stmt
4601 = gimple_build_assign (new_temp, code,
4602 arginfo[i].op, tcst);
4603 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4604 vargs.safe_push (new_temp);
4606 break;
4607 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4608 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4609 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4610 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4611 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4612 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4613 default:
4614 gcc_unreachable ();
4618 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4619 if (vec_dest)
4621 gcc_assert (ratype
4622 || known_eq (simd_clone_subparts (rtype), nunits));
4623 if (ratype)
4624 new_temp = create_tmp_var (ratype);
4625 else if (useless_type_conversion_p (vectype, rtype))
4626 new_temp = make_ssa_name (vec_dest, new_call);
4627 else
4628 new_temp = make_ssa_name (rtype, new_call);
4629 gimple_call_set_lhs (new_call, new_temp);
4631 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4632 gimple *new_stmt = new_call;
4634 if (vec_dest)
4636 if (!multiple_p (simd_clone_subparts (vectype), nunits))
4638 unsigned int k, l;
4639 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4640 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4641 k = vector_unroll_factor (nunits,
4642 simd_clone_subparts (vectype));
4643 gcc_assert ((k & (k - 1)) == 0);
4644 for (l = 0; l < k; l++)
4646 tree t;
4647 if (ratype)
4649 t = build_fold_addr_expr (new_temp);
4650 t = build2 (MEM_REF, vectype, t,
4651 build_int_cst (TREE_TYPE (t), l * bytes));
4653 else
4654 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4655 bitsize_int (prec), bitsize_int (l * prec));
4656 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4657 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4659 if (j == 0 && l == 0)
4660 *vec_stmt = new_stmt;
4661 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4664 if (ratype)
4665 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4666 continue;
4668 else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4670 unsigned int k = (simd_clone_subparts (vectype)
4671 / simd_clone_subparts (rtype));
4672 gcc_assert ((k & (k - 1)) == 0);
4673 if ((j & (k - 1)) == 0)
4674 vec_alloc (ret_ctor_elts, k);
4675 if (ratype)
4677 unsigned int m, o;
4678 o = vector_unroll_factor (nunits,
4679 simd_clone_subparts (rtype));
4680 for (m = 0; m < o; m++)
4682 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4683 size_int (m), NULL_TREE, NULL_TREE);
4684 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4685 tem);
4686 vect_finish_stmt_generation (vinfo, stmt_info,
4687 new_stmt, gsi);
4688 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4689 gimple_assign_lhs (new_stmt));
4691 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4693 else
4694 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4695 if ((j & (k - 1)) != k - 1)
4696 continue;
4697 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4698 new_stmt
4699 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4700 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4702 if ((unsigned) j == k - 1)
4703 *vec_stmt = new_stmt;
4704 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4705 continue;
4707 else if (ratype)
4709 tree t = build_fold_addr_expr (new_temp);
4710 t = build2 (MEM_REF, vectype, t,
4711 build_int_cst (TREE_TYPE (t), 0));
4712 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4713 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4714 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4716 else if (!useless_type_conversion_p (vectype, rtype))
4718 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4719 new_stmt
4720 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4721 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4725 if (j == 0)
4726 *vec_stmt = new_stmt;
4727 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4730 for (i = 0; i < nargs; ++i)
4732 vec<tree> oprndsi = vec_oprnds[i];
4733 oprndsi.release ();
4735 vargs.release ();
4737 /* Mark the clone as no longer being a candidate for GC. */
4738 bestn->gc_candidate = false;
4740 /* The call in STMT might prevent it from being removed in dce.
4741 We however cannot remove it here, due to the way the ssa name
4742 it defines is mapped to the new definition. So just replace
4743 rhs of the statement with something harmless. */
4745 if (slp_node)
4746 return true;
4748 gimple *new_stmt;
4749 if (scalar_dest)
4751 type = TREE_TYPE (scalar_dest);
4752 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4753 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4755 else
4756 new_stmt = gimple_build_nop ();
4757 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4758 unlink_stmt_vdef (stmt);
4760 return true;
4764 /* Function vect_gen_widened_results_half
4766 Create a vector stmt whose code, type, number of arguments, and result
4767 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4768 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4769 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4770 needs to be created (DECL is a function-decl of a target-builtin).
4771 STMT_INFO is the original scalar stmt that we are vectorizing. */
4773 static gimple *
4774 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4775 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4776 tree vec_dest, gimple_stmt_iterator *gsi,
4777 stmt_vec_info stmt_info)
4779 gimple *new_stmt;
4780 tree new_temp;
4782 /* Generate half of the widened result: */
4783 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4784 if (op_type != binary_op)
4785 vec_oprnd1 = NULL;
4786 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4787 new_temp = make_ssa_name (vec_dest, new_stmt);
4788 gimple_assign_set_lhs (new_stmt, new_temp);
4789 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4791 return new_stmt;
4795 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4796 For multi-step conversions store the resulting vectors and call the function
4797 recursively. */
4799 static void
4800 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4801 int multi_step_cvt,
4802 stmt_vec_info stmt_info,
4803 vec<tree> &vec_dsts,
4804 gimple_stmt_iterator *gsi,
4805 slp_tree slp_node, enum tree_code code)
4807 unsigned int i;
4808 tree vop0, vop1, new_tmp, vec_dest;
4810 vec_dest = vec_dsts.pop ();
4812 for (i = 0; i < vec_oprnds->length (); i += 2)
4814 /* Create demotion operation. */
4815 vop0 = (*vec_oprnds)[i];
4816 vop1 = (*vec_oprnds)[i + 1];
4817 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4818 new_tmp = make_ssa_name (vec_dest, new_stmt);
4819 gimple_assign_set_lhs (new_stmt, new_tmp);
4820 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4822 if (multi_step_cvt)
4823 /* Store the resulting vector for next recursive call. */
4824 (*vec_oprnds)[i/2] = new_tmp;
4825 else
4827 /* This is the last step of the conversion sequence. Store the
4828 vectors in SLP_NODE or in vector info of the scalar statement
4829 (or in STMT_VINFO_RELATED_STMT chain). */
4830 if (slp_node)
4831 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4832 else
4833 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4837 /* For multi-step demotion operations we first generate demotion operations
4838 from the source type to the intermediate types, and then combine the
4839 results (stored in VEC_OPRNDS) in demotion operation to the destination
4840 type. */
4841 if (multi_step_cvt)
4843 /* At each level of recursion we have half of the operands we had at the
4844 previous level. */
4845 vec_oprnds->truncate ((i+1)/2);
4846 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4847 multi_step_cvt - 1,
4848 stmt_info, vec_dsts, gsi,
4849 slp_node, VEC_PACK_TRUNC_EXPR);
4852 vec_dsts.quick_push (vec_dest);
4856 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4857 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4858 STMT_INFO. For multi-step conversions store the resulting vectors and
4859 call the function recursively. */
4861 static void
4862 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4863 vec<tree> *vec_oprnds0,
4864 vec<tree> *vec_oprnds1,
4865 stmt_vec_info stmt_info, tree vec_dest,
4866 gimple_stmt_iterator *gsi,
4867 enum tree_code code1,
4868 enum tree_code code2, int op_type)
4870 int i;
4871 tree vop0, vop1, new_tmp1, new_tmp2;
4872 gimple *new_stmt1, *new_stmt2;
4873 vec<tree> vec_tmp = vNULL;
4875 vec_tmp.create (vec_oprnds0->length () * 2);
4876 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4878 if (op_type == binary_op)
4879 vop1 = (*vec_oprnds1)[i];
4880 else
4881 vop1 = NULL_TREE;
4883 /* Generate the two halves of promotion operation. */
4884 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4885 op_type, vec_dest, gsi,
4886 stmt_info);
4887 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4888 op_type, vec_dest, gsi,
4889 stmt_info);
4890 if (is_gimple_call (new_stmt1))
4892 new_tmp1 = gimple_call_lhs (new_stmt1);
4893 new_tmp2 = gimple_call_lhs (new_stmt2);
4895 else
4897 new_tmp1 = gimple_assign_lhs (new_stmt1);
4898 new_tmp2 = gimple_assign_lhs (new_stmt2);
4901 /* Store the results for the next step. */
4902 vec_tmp.quick_push (new_tmp1);
4903 vec_tmp.quick_push (new_tmp2);
4906 vec_oprnds0->release ();
4907 *vec_oprnds0 = vec_tmp;
4910 /* Create vectorized promotion stmts for widening stmts using only half the
4911 potential vector size for input. */
4912 static void
4913 vect_create_half_widening_stmts (vec_info *vinfo,
4914 vec<tree> *vec_oprnds0,
4915 vec<tree> *vec_oprnds1,
4916 stmt_vec_info stmt_info, tree vec_dest,
4917 gimple_stmt_iterator *gsi,
4918 enum tree_code code1,
4919 int op_type)
4921 int i;
4922 tree vop0, vop1;
4923 gimple *new_stmt1;
4924 gimple *new_stmt2;
4925 gimple *new_stmt3;
4926 vec<tree> vec_tmp = vNULL;
4928 vec_tmp.create (vec_oprnds0->length ());
4929 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4931 tree new_tmp1, new_tmp2, new_tmp3, out_type;
4933 gcc_assert (op_type == binary_op);
4934 vop1 = (*vec_oprnds1)[i];
4936 /* Widen the first vector input. */
4937 out_type = TREE_TYPE (vec_dest);
4938 new_tmp1 = make_ssa_name (out_type);
4939 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
4940 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
4941 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
4943 /* Widen the second vector input. */
4944 new_tmp2 = make_ssa_name (out_type);
4945 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
4946 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
4947 /* Perform the operation. With both vector inputs widened. */
4948 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, new_tmp2);
4950 else
4952 /* Perform the operation. With the single vector input widened. */
4953 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, vop1);
4956 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
4957 gimple_assign_set_lhs (new_stmt3, new_tmp3);
4958 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
4960 /* Store the results for the next step. */
4961 vec_tmp.quick_push (new_tmp3);
4964 vec_oprnds0->release ();
4965 *vec_oprnds0 = vec_tmp;
4969 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4970 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4971 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4972 Return true if STMT_INFO is vectorizable in this way. */
4974 static bool
4975 vectorizable_conversion (vec_info *vinfo,
4976 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4977 gimple **vec_stmt, slp_tree slp_node,
4978 stmt_vector_for_cost *cost_vec)
4980 tree vec_dest;
4981 tree scalar_dest;
4982 tree op0, op1 = NULL_TREE;
4983 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4984 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4985 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4986 tree new_temp;
4987 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4988 int ndts = 2;
4989 poly_uint64 nunits_in;
4990 poly_uint64 nunits_out;
4991 tree vectype_out, vectype_in;
4992 int ncopies, i;
4993 tree lhs_type, rhs_type;
4994 enum { NARROW, NONE, WIDEN } modifier;
4995 vec<tree> vec_oprnds0 = vNULL;
4996 vec<tree> vec_oprnds1 = vNULL;
4997 tree vop0;
4998 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4999 int multi_step_cvt = 0;
5000 vec<tree> interm_types = vNULL;
5001 tree intermediate_type, cvt_type = NULL_TREE;
5002 int op_type;
5003 unsigned short fltsz;
5005 /* Is STMT a vectorizable conversion? */
5007 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5008 return false;
5010 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5011 && ! vec_stmt)
5012 return false;
5014 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5015 if (!stmt)
5016 return false;
5018 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5019 return false;
5021 code = gimple_assign_rhs_code (stmt);
5022 if (!CONVERT_EXPR_CODE_P (code)
5023 && code != FIX_TRUNC_EXPR
5024 && code != FLOAT_EXPR
5025 && code != WIDEN_PLUS_EXPR
5026 && code != WIDEN_MINUS_EXPR
5027 && code != WIDEN_MULT_EXPR
5028 && code != WIDEN_LSHIFT_EXPR)
5029 return false;
5031 bool widen_arith = (code == WIDEN_PLUS_EXPR
5032 || code == WIDEN_MINUS_EXPR
5033 || code == WIDEN_MULT_EXPR
5034 || code == WIDEN_LSHIFT_EXPR);
5035 op_type = TREE_CODE_LENGTH (code);
5037 /* Check types of lhs and rhs. */
5038 scalar_dest = gimple_assign_lhs (stmt);
5039 lhs_type = TREE_TYPE (scalar_dest);
5040 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5042 /* Check the operands of the operation. */
5043 slp_tree slp_op0, slp_op1 = NULL;
5044 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5045 0, &op0, &slp_op0, &dt[0], &vectype_in))
5047 if (dump_enabled_p ())
5048 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5049 "use not simple.\n");
5050 return false;
5053 rhs_type = TREE_TYPE (op0);
5054 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
5055 && !((INTEGRAL_TYPE_P (lhs_type)
5056 && INTEGRAL_TYPE_P (rhs_type))
5057 || (SCALAR_FLOAT_TYPE_P (lhs_type)
5058 && SCALAR_FLOAT_TYPE_P (rhs_type))))
5059 return false;
5061 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5062 && ((INTEGRAL_TYPE_P (lhs_type)
5063 && !type_has_mode_precision_p (lhs_type))
5064 || (INTEGRAL_TYPE_P (rhs_type)
5065 && !type_has_mode_precision_p (rhs_type))))
5067 if (dump_enabled_p ())
5068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5069 "type conversion to/from bit-precision unsupported."
5070 "\n");
5071 return false;
5074 if (op_type == binary_op)
5076 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR
5077 || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR);
5079 op1 = gimple_assign_rhs2 (stmt);
5080 tree vectype1_in;
5081 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
5082 &op1, &slp_op1, &dt[1], &vectype1_in))
5084 if (dump_enabled_p ())
5085 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5086 "use not simple.\n");
5087 return false;
5089 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5090 OP1. */
5091 if (!vectype_in)
5092 vectype_in = vectype1_in;
5095 /* If op0 is an external or constant def, infer the vector type
5096 from the scalar type. */
5097 if (!vectype_in)
5098 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
5099 if (vec_stmt)
5100 gcc_assert (vectype_in);
5101 if (!vectype_in)
5103 if (dump_enabled_p ())
5104 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5105 "no vectype for scalar type %T\n", rhs_type);
5107 return false;
5110 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
5111 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5113 if (dump_enabled_p ())
5114 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5115 "can't convert between boolean and non "
5116 "boolean vectors %T\n", rhs_type);
5118 return false;
5121 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
5122 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5123 if (known_eq (nunits_out, nunits_in))
5124 if (widen_arith)
5125 modifier = WIDEN;
5126 else
5127 modifier = NONE;
5128 else if (multiple_p (nunits_out, nunits_in))
5129 modifier = NARROW;
5130 else
5132 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5133 modifier = WIDEN;
5136 /* Multiple types in SLP are handled by creating the appropriate number of
5137 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5138 case of SLP. */
5139 if (slp_node)
5140 ncopies = 1;
5141 else if (modifier == NARROW)
5142 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
5143 else
5144 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
5146 /* Sanity check: make sure that at least one copy of the vectorized stmt
5147 needs to be generated. */
5148 gcc_assert (ncopies >= 1);
5150 bool found_mode = false;
5151 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5152 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5153 opt_scalar_mode rhs_mode_iter;
5155 /* Supportable by target? */
5156 switch (modifier)
5158 case NONE:
5159 if (code != FIX_TRUNC_EXPR
5160 && code != FLOAT_EXPR
5161 && !CONVERT_EXPR_CODE_P (code))
5162 return false;
5163 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
5164 break;
5165 /* FALLTHRU */
5166 unsupported:
5167 if (dump_enabled_p ())
5168 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5169 "conversion not supported by target.\n");
5170 return false;
5172 case WIDEN:
5173 if (known_eq (nunits_in, nunits_out))
5175 if (!supportable_half_widening_operation (code, vectype_out,
5176 vectype_in, &code1))
5177 goto unsupported;
5178 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5179 break;
5181 if (supportable_widening_operation (vinfo, code, stmt_info,
5182 vectype_out, vectype_in, &code1,
5183 &code2, &multi_step_cvt,
5184 &interm_types))
5186 /* Binary widening operation can only be supported directly by the
5187 architecture. */
5188 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5189 break;
5192 if (code != FLOAT_EXPR
5193 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
5194 goto unsupported;
5196 fltsz = GET_MODE_SIZE (lhs_mode);
5197 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5199 rhs_mode = rhs_mode_iter.require ();
5200 if (GET_MODE_SIZE (rhs_mode) > fltsz)
5201 break;
5203 cvt_type
5204 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5205 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5206 if (cvt_type == NULL_TREE)
5207 goto unsupported;
5209 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5211 if (!supportable_convert_operation (code, vectype_out,
5212 cvt_type, &codecvt1))
5213 goto unsupported;
5215 else if (!supportable_widening_operation (vinfo, code, stmt_info,
5216 vectype_out, cvt_type,
5217 &codecvt1, &codecvt2,
5218 &multi_step_cvt,
5219 &interm_types))
5220 continue;
5221 else
5222 gcc_assert (multi_step_cvt == 0);
5224 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5225 cvt_type,
5226 vectype_in, &code1, &code2,
5227 &multi_step_cvt, &interm_types))
5229 found_mode = true;
5230 break;
5234 if (!found_mode)
5235 goto unsupported;
5237 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5238 codecvt2 = ERROR_MARK;
5239 else
5241 multi_step_cvt++;
5242 interm_types.safe_push (cvt_type);
5243 cvt_type = NULL_TREE;
5245 break;
5247 case NARROW:
5248 gcc_assert (op_type == unary_op);
5249 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5250 &code1, &multi_step_cvt,
5251 &interm_types))
5252 break;
5254 if (code != FIX_TRUNC_EXPR
5255 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5256 goto unsupported;
5258 cvt_type
5259 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5260 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5261 if (cvt_type == NULL_TREE)
5262 goto unsupported;
5263 if (!supportable_convert_operation (code, cvt_type, vectype_in,
5264 &codecvt1))
5265 goto unsupported;
5266 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5267 &code1, &multi_step_cvt,
5268 &interm_types))
5269 break;
5270 goto unsupported;
5272 default:
5273 gcc_unreachable ();
5276 if (!vec_stmt) /* transformation not required. */
5278 if (slp_node
5279 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5280 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5282 if (dump_enabled_p ())
5283 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5284 "incompatible vector types for invariants\n");
5285 return false;
5287 DUMP_VECT_SCOPE ("vectorizable_conversion");
5288 if (modifier == NONE)
5290 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5291 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5292 cost_vec);
5294 else if (modifier == NARROW)
5296 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5297 /* The final packing step produces one vector result per copy. */
5298 unsigned int nvectors
5299 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5300 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5301 multi_step_cvt, cost_vec,
5302 widen_arith);
5304 else
5306 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5307 /* The initial unpacking step produces two vector results
5308 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5309 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5310 unsigned int nvectors
5311 = (slp_node
5312 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5313 : ncopies * 2);
5314 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5315 multi_step_cvt, cost_vec,
5316 widen_arith);
5318 interm_types.release ();
5319 return true;
5322 /* Transform. */
5323 if (dump_enabled_p ())
5324 dump_printf_loc (MSG_NOTE, vect_location,
5325 "transform conversion. ncopies = %d.\n", ncopies);
5327 if (op_type == binary_op)
5329 if (CONSTANT_CLASS_P (op0))
5330 op0 = fold_convert (TREE_TYPE (op1), op0);
5331 else if (CONSTANT_CLASS_P (op1))
5332 op1 = fold_convert (TREE_TYPE (op0), op1);
5335 /* In case of multi-step conversion, we first generate conversion operations
5336 to the intermediate types, and then from that types to the final one.
5337 We create vector destinations for the intermediate type (TYPES) received
5338 from supportable_*_operation, and store them in the correct order
5339 for future use in vect_create_vectorized_*_stmts (). */
5340 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5341 vec_dest = vect_create_destination_var (scalar_dest,
5342 (cvt_type && modifier == WIDEN)
5343 ? cvt_type : vectype_out);
5344 vec_dsts.quick_push (vec_dest);
5346 if (multi_step_cvt)
5348 for (i = interm_types.length () - 1;
5349 interm_types.iterate (i, &intermediate_type); i--)
5351 vec_dest = vect_create_destination_var (scalar_dest,
5352 intermediate_type);
5353 vec_dsts.quick_push (vec_dest);
5357 if (cvt_type)
5358 vec_dest = vect_create_destination_var (scalar_dest,
5359 modifier == WIDEN
5360 ? vectype_out : cvt_type);
5362 int ninputs = 1;
5363 if (!slp_node)
5365 if (modifier == WIDEN)
5367 else if (modifier == NARROW)
5369 if (multi_step_cvt)
5370 ninputs = vect_pow2 (multi_step_cvt);
5371 ninputs *= 2;
5375 switch (modifier)
5377 case NONE:
5378 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5379 op0, &vec_oprnds0);
5380 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5382 /* Arguments are ready, create the new vector stmt. */
5383 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5384 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5385 new_temp = make_ssa_name (vec_dest, new_stmt);
5386 gimple_assign_set_lhs (new_stmt, new_temp);
5387 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5389 if (slp_node)
5390 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5391 else
5392 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5394 break;
5396 case WIDEN:
5397 /* In case the vectorization factor (VF) is bigger than the number
5398 of elements that we can fit in a vectype (nunits), we have to
5399 generate more than one vector stmt - i.e - we need to "unroll"
5400 the vector stmt by a factor VF/nunits. */
5401 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5402 op0, &vec_oprnds0,
5403 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5404 &vec_oprnds1);
5405 if (code == WIDEN_LSHIFT_EXPR)
5407 int oprnds_size = vec_oprnds0.length ();
5408 vec_oprnds1.create (oprnds_size);
5409 for (i = 0; i < oprnds_size; ++i)
5410 vec_oprnds1.quick_push (op1);
5412 /* Arguments are ready. Create the new vector stmts. */
5413 for (i = multi_step_cvt; i >= 0; i--)
5415 tree this_dest = vec_dsts[i];
5416 enum tree_code c1 = code1, c2 = code2;
5417 if (i == 0 && codecvt2 != ERROR_MARK)
5419 c1 = codecvt1;
5420 c2 = codecvt2;
5422 if (known_eq (nunits_out, nunits_in))
5423 vect_create_half_widening_stmts (vinfo, &vec_oprnds0,
5424 &vec_oprnds1, stmt_info,
5425 this_dest, gsi,
5426 c1, op_type);
5427 else
5428 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5429 &vec_oprnds1, stmt_info,
5430 this_dest, gsi,
5431 c1, c2, op_type);
5434 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5436 gimple *new_stmt;
5437 if (cvt_type)
5439 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5440 new_temp = make_ssa_name (vec_dest);
5441 new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
5442 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5444 else
5445 new_stmt = SSA_NAME_DEF_STMT (vop0);
5447 if (slp_node)
5448 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5449 else
5450 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5452 break;
5454 case NARROW:
5455 /* In case the vectorization factor (VF) is bigger than the number
5456 of elements that we can fit in a vectype (nunits), we have to
5457 generate more than one vector stmt - i.e - we need to "unroll"
5458 the vector stmt by a factor VF/nunits. */
5459 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5460 op0, &vec_oprnds0);
5461 /* Arguments are ready. Create the new vector stmts. */
5462 if (cvt_type)
5463 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5465 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5466 new_temp = make_ssa_name (vec_dest);
5467 gassign *new_stmt
5468 = gimple_build_assign (new_temp, codecvt1, vop0);
5469 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5470 vec_oprnds0[i] = new_temp;
5473 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5474 multi_step_cvt,
5475 stmt_info, vec_dsts, gsi,
5476 slp_node, code1);
5477 break;
5479 if (!slp_node)
5480 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5482 vec_oprnds0.release ();
5483 vec_oprnds1.release ();
5484 interm_types.release ();
5486 return true;
5489 /* Return true if we can assume from the scalar form of STMT_INFO that
5490 neither the scalar nor the vector forms will generate code. STMT_INFO
5491 is known not to involve a data reference. */
5493 bool
5494 vect_nop_conversion_p (stmt_vec_info stmt_info)
5496 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5497 if (!stmt)
5498 return false;
5500 tree lhs = gimple_assign_lhs (stmt);
5501 tree_code code = gimple_assign_rhs_code (stmt);
5502 tree rhs = gimple_assign_rhs1 (stmt);
5504 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5505 return true;
5507 if (CONVERT_EXPR_CODE_P (code))
5508 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5510 return false;
5513 /* Function vectorizable_assignment.
5515 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5516 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5517 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5518 Return true if STMT_INFO is vectorizable in this way. */
5520 static bool
5521 vectorizable_assignment (vec_info *vinfo,
5522 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5523 gimple **vec_stmt, slp_tree slp_node,
5524 stmt_vector_for_cost *cost_vec)
5526 tree vec_dest;
5527 tree scalar_dest;
5528 tree op;
5529 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5530 tree new_temp;
5531 enum vect_def_type dt[1] = {vect_unknown_def_type};
5532 int ndts = 1;
5533 int ncopies;
5534 int i;
5535 vec<tree> vec_oprnds = vNULL;
5536 tree vop;
5537 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5538 enum tree_code code;
5539 tree vectype_in;
5541 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5542 return false;
5544 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5545 && ! vec_stmt)
5546 return false;
5548 /* Is vectorizable assignment? */
5549 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5550 if (!stmt)
5551 return false;
5553 scalar_dest = gimple_assign_lhs (stmt);
5554 if (TREE_CODE (scalar_dest) != SSA_NAME)
5555 return false;
5557 if (STMT_VINFO_DATA_REF (stmt_info))
5558 return false;
5560 code = gimple_assign_rhs_code (stmt);
5561 if (!(gimple_assign_single_p (stmt)
5562 || code == PAREN_EXPR
5563 || CONVERT_EXPR_CODE_P (code)))
5564 return false;
5566 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5567 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5569 /* Multiple types in SLP are handled by creating the appropriate number of
5570 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5571 case of SLP. */
5572 if (slp_node)
5573 ncopies = 1;
5574 else
5575 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5577 gcc_assert (ncopies >= 1);
5579 slp_tree slp_op;
5580 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5581 &dt[0], &vectype_in))
5583 if (dump_enabled_p ())
5584 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5585 "use not simple.\n");
5586 return false;
5588 if (!vectype_in)
5589 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5591 /* We can handle NOP_EXPR conversions that do not change the number
5592 of elements or the vector size. */
5593 if ((CONVERT_EXPR_CODE_P (code)
5594 || code == VIEW_CONVERT_EXPR)
5595 && (!vectype_in
5596 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5597 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5598 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5599 return false;
5601 if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
5603 if (dump_enabled_p ())
5604 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5605 "can't convert between boolean and non "
5606 "boolean vectors %T\n", TREE_TYPE (op));
5608 return false;
5611 /* We do not handle bit-precision changes. */
5612 if ((CONVERT_EXPR_CODE_P (code)
5613 || code == VIEW_CONVERT_EXPR)
5614 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5615 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5616 || !type_has_mode_precision_p (TREE_TYPE (op)))
5617 /* But a conversion that does not change the bit-pattern is ok. */
5618 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5619 > TYPE_PRECISION (TREE_TYPE (op)))
5620 && TYPE_UNSIGNED (TREE_TYPE (op))))
5622 if (dump_enabled_p ())
5623 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5624 "type conversion to/from bit-precision "
5625 "unsupported.\n");
5626 return false;
5629 if (!vec_stmt) /* transformation not required. */
5631 if (slp_node
5632 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5634 if (dump_enabled_p ())
5635 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5636 "incompatible vector types for invariants\n");
5637 return false;
5639 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5640 DUMP_VECT_SCOPE ("vectorizable_assignment");
5641 if (!vect_nop_conversion_p (stmt_info))
5642 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5643 cost_vec);
5644 return true;
5647 /* Transform. */
5648 if (dump_enabled_p ())
5649 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5651 /* Handle def. */
5652 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5654 /* Handle use. */
5655 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5657 /* Arguments are ready. create the new vector stmt. */
5658 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5660 if (CONVERT_EXPR_CODE_P (code)
5661 || code == VIEW_CONVERT_EXPR)
5662 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5663 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5664 new_temp = make_ssa_name (vec_dest, new_stmt);
5665 gimple_assign_set_lhs (new_stmt, new_temp);
5666 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5667 if (slp_node)
5668 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5669 else
5670 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5672 if (!slp_node)
5673 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5675 vec_oprnds.release ();
5676 return true;
5680 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5681 either as shift by a scalar or by a vector. */
5683 bool
5684 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5687 machine_mode vec_mode;
5688 optab optab;
5689 int icode;
5690 tree vectype;
5692 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5693 if (!vectype)
5694 return false;
5696 optab = optab_for_tree_code (code, vectype, optab_scalar);
5697 if (!optab
5698 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5700 optab = optab_for_tree_code (code, vectype, optab_vector);
5701 if (!optab
5702 || (optab_handler (optab, TYPE_MODE (vectype))
5703 == CODE_FOR_nothing))
5704 return false;
5707 vec_mode = TYPE_MODE (vectype);
5708 icode = (int) optab_handler (optab, vec_mode);
5709 if (icode == CODE_FOR_nothing)
5710 return false;
5712 return true;
5716 /* Function vectorizable_shift.
5718 Check if STMT_INFO performs a shift operation that can be vectorized.
5719 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5720 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5721 Return true if STMT_INFO is vectorizable in this way. */
5723 static bool
5724 vectorizable_shift (vec_info *vinfo,
5725 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5726 gimple **vec_stmt, slp_tree slp_node,
5727 stmt_vector_for_cost *cost_vec)
5729 tree vec_dest;
5730 tree scalar_dest;
5731 tree op0, op1 = NULL;
5732 tree vec_oprnd1 = NULL_TREE;
5733 tree vectype;
5734 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5735 enum tree_code code;
5736 machine_mode vec_mode;
5737 tree new_temp;
5738 optab optab;
5739 int icode;
5740 machine_mode optab_op2_mode;
5741 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5742 int ndts = 2;
5743 poly_uint64 nunits_in;
5744 poly_uint64 nunits_out;
5745 tree vectype_out;
5746 tree op1_vectype;
5747 int ncopies;
5748 int i;
5749 vec<tree> vec_oprnds0 = vNULL;
5750 vec<tree> vec_oprnds1 = vNULL;
5751 tree vop0, vop1;
5752 unsigned int k;
5753 bool scalar_shift_arg = true;
5754 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5755 bool incompatible_op1_vectype_p = false;
5757 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5758 return false;
5760 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5761 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5762 && ! vec_stmt)
5763 return false;
5765 /* Is STMT a vectorizable binary/unary operation? */
5766 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5767 if (!stmt)
5768 return false;
5770 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5771 return false;
5773 code = gimple_assign_rhs_code (stmt);
5775 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5776 || code == RROTATE_EXPR))
5777 return false;
5779 scalar_dest = gimple_assign_lhs (stmt);
5780 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5781 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5783 if (dump_enabled_p ())
5784 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5785 "bit-precision shifts not supported.\n");
5786 return false;
5789 slp_tree slp_op0;
5790 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5791 0, &op0, &slp_op0, &dt[0], &vectype))
5793 if (dump_enabled_p ())
5794 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5795 "use not simple.\n");
5796 return false;
5798 /* If op0 is an external or constant def, infer the vector type
5799 from the scalar type. */
5800 if (!vectype)
5801 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5802 if (vec_stmt)
5803 gcc_assert (vectype);
5804 if (!vectype)
5806 if (dump_enabled_p ())
5807 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5808 "no vectype for scalar type\n");
5809 return false;
5812 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5813 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5814 if (maybe_ne (nunits_out, nunits_in))
5815 return false;
5817 stmt_vec_info op1_def_stmt_info;
5818 slp_tree slp_op1;
5819 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5820 &dt[1], &op1_vectype, &op1_def_stmt_info))
5822 if (dump_enabled_p ())
5823 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5824 "use not simple.\n");
5825 return false;
5828 /* Multiple types in SLP are handled by creating the appropriate number of
5829 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5830 case of SLP. */
5831 if (slp_node)
5832 ncopies = 1;
5833 else
5834 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5836 gcc_assert (ncopies >= 1);
5838 /* Determine whether the shift amount is a vector, or scalar. If the
5839 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5841 if ((dt[1] == vect_internal_def
5842 || dt[1] == vect_induction_def
5843 || dt[1] == vect_nested_cycle)
5844 && !slp_node)
5845 scalar_shift_arg = false;
5846 else if (dt[1] == vect_constant_def
5847 || dt[1] == vect_external_def
5848 || dt[1] == vect_internal_def)
5850 /* In SLP, need to check whether the shift count is the same,
5851 in loops if it is a constant or invariant, it is always
5852 a scalar shift. */
5853 if (slp_node)
5855 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5856 stmt_vec_info slpstmt_info;
5858 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5860 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5861 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5862 scalar_shift_arg = false;
5865 /* For internal SLP defs we have to make sure we see scalar stmts
5866 for all vector elements.
5867 ??? For different vectors we could resort to a different
5868 scalar shift operand but code-generation below simply always
5869 takes the first. */
5870 if (dt[1] == vect_internal_def
5871 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5872 stmts.length ()))
5873 scalar_shift_arg = false;
5876 /* If the shift amount is computed by a pattern stmt we cannot
5877 use the scalar amount directly thus give up and use a vector
5878 shift. */
5879 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5880 scalar_shift_arg = false;
5882 else
5884 if (dump_enabled_p ())
5885 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5886 "operand mode requires invariant argument.\n");
5887 return false;
5890 /* Vector shifted by vector. */
5891 bool was_scalar_shift_arg = scalar_shift_arg;
5892 if (!scalar_shift_arg)
5894 optab = optab_for_tree_code (code, vectype, optab_vector);
5895 if (dump_enabled_p ())
5896 dump_printf_loc (MSG_NOTE, vect_location,
5897 "vector/vector shift/rotate found.\n");
5899 if (!op1_vectype)
5900 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5901 slp_op1);
5902 incompatible_op1_vectype_p
5903 = (op1_vectype == NULL_TREE
5904 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5905 TYPE_VECTOR_SUBPARTS (vectype))
5906 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5907 if (incompatible_op1_vectype_p
5908 && (!slp_node
5909 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5910 || slp_op1->refcnt != 1))
5912 if (dump_enabled_p ())
5913 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5914 "unusable type for last operand in"
5915 " vector/vector shift/rotate.\n");
5916 return false;
5919 /* See if the machine has a vector shifted by scalar insn and if not
5920 then see if it has a vector shifted by vector insn. */
5921 else
5923 optab = optab_for_tree_code (code, vectype, optab_scalar);
5924 if (optab
5925 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5927 if (dump_enabled_p ())
5928 dump_printf_loc (MSG_NOTE, vect_location,
5929 "vector/scalar shift/rotate found.\n");
5931 else
5933 optab = optab_for_tree_code (code, vectype, optab_vector);
5934 if (optab
5935 && (optab_handler (optab, TYPE_MODE (vectype))
5936 != CODE_FOR_nothing))
5938 scalar_shift_arg = false;
5940 if (dump_enabled_p ())
5941 dump_printf_loc (MSG_NOTE, vect_location,
5942 "vector/vector shift/rotate found.\n");
5944 if (!op1_vectype)
5945 op1_vectype = get_vectype_for_scalar_type (vinfo,
5946 TREE_TYPE (op1),
5947 slp_op1);
5949 /* Unlike the other binary operators, shifts/rotates have
5950 the rhs being int, instead of the same type as the lhs,
5951 so make sure the scalar is the right type if we are
5952 dealing with vectors of long long/long/short/char. */
5953 incompatible_op1_vectype_p
5954 = (!op1_vectype
5955 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5956 TREE_TYPE (op1)));
5957 if (incompatible_op1_vectype_p
5958 && dt[1] == vect_internal_def)
5960 if (dump_enabled_p ())
5961 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5962 "unusable type for last operand in"
5963 " vector/vector shift/rotate.\n");
5964 return false;
5970 /* Supportable by target? */
5971 if (!optab)
5973 if (dump_enabled_p ())
5974 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5975 "no optab.\n");
5976 return false;
5978 vec_mode = TYPE_MODE (vectype);
5979 icode = (int) optab_handler (optab, vec_mode);
5980 if (icode == CODE_FOR_nothing)
5982 if (dump_enabled_p ())
5983 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5984 "op not supported by target.\n");
5985 return false;
5987 /* vector lowering cannot optimize vector shifts using word arithmetic. */
5988 if (vect_emulated_vector_p (vectype))
5989 return false;
5991 if (!vec_stmt) /* transformation not required. */
5993 if (slp_node
5994 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5995 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5996 && (!incompatible_op1_vectype_p
5997 || dt[1] == vect_constant_def)
5998 && !vect_maybe_update_slp_op_vectype
5999 (slp_op1,
6000 incompatible_op1_vectype_p ? vectype : op1_vectype))))
6002 if (dump_enabled_p ())
6003 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6004 "incompatible vector types for invariants\n");
6005 return false;
6007 /* Now adjust the constant shift amount in place. */
6008 if (slp_node
6009 && incompatible_op1_vectype_p
6010 && dt[1] == vect_constant_def)
6012 for (unsigned i = 0;
6013 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
6015 SLP_TREE_SCALAR_OPS (slp_op1)[i]
6016 = fold_convert (TREE_TYPE (vectype),
6017 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
6018 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
6019 == INTEGER_CST));
6022 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
6023 DUMP_VECT_SCOPE ("vectorizable_shift");
6024 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
6025 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
6026 return true;
6029 /* Transform. */
6031 if (dump_enabled_p ())
6032 dump_printf_loc (MSG_NOTE, vect_location,
6033 "transform binary/unary operation.\n");
6035 if (incompatible_op1_vectype_p && !slp_node)
6037 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
6038 op1 = fold_convert (TREE_TYPE (vectype), op1);
6039 if (dt[1] != vect_constant_def)
6040 op1 = vect_init_vector (vinfo, stmt_info, op1,
6041 TREE_TYPE (vectype), NULL);
6044 /* Handle def. */
6045 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6047 if (scalar_shift_arg && dt[1] != vect_internal_def)
6049 /* Vector shl and shr insn patterns can be defined with scalar
6050 operand 2 (shift operand). In this case, use constant or loop
6051 invariant op1 directly, without extending it to vector mode
6052 first. */
6053 optab_op2_mode = insn_data[icode].operand[2].mode;
6054 if (!VECTOR_MODE_P (optab_op2_mode))
6056 if (dump_enabled_p ())
6057 dump_printf_loc (MSG_NOTE, vect_location,
6058 "operand 1 using scalar mode.\n");
6059 vec_oprnd1 = op1;
6060 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
6061 vec_oprnds1.quick_push (vec_oprnd1);
6062 /* Store vec_oprnd1 for every vector stmt to be created.
6063 We check during the analysis that all the shift arguments
6064 are the same.
6065 TODO: Allow different constants for different vector
6066 stmts generated for an SLP instance. */
6067 for (k = 0;
6068 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
6069 vec_oprnds1.quick_push (vec_oprnd1);
6072 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
6074 if (was_scalar_shift_arg)
6076 /* If the argument was the same in all lanes create
6077 the correctly typed vector shift amount directly. */
6078 op1 = fold_convert (TREE_TYPE (vectype), op1);
6079 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
6080 !loop_vinfo ? gsi : NULL);
6081 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
6082 !loop_vinfo ? gsi : NULL);
6083 vec_oprnds1.create (slp_node->vec_stmts_size);
6084 for (k = 0; k < slp_node->vec_stmts_size; k++)
6085 vec_oprnds1.quick_push (vec_oprnd1);
6087 else if (dt[1] == vect_constant_def)
6088 /* The constant shift amount has been adjusted in place. */
6090 else
6091 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
6094 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6095 (a special case for certain kind of vector shifts); otherwise,
6096 operand 1 should be of a vector type (the usual case). */
6097 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6098 op0, &vec_oprnds0,
6099 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
6101 /* Arguments are ready. Create the new vector stmt. */
6102 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6104 /* For internal defs where we need to use a scalar shift arg
6105 extract the first lane. */
6106 if (scalar_shift_arg && dt[1] == vect_internal_def)
6108 vop1 = vec_oprnds1[0];
6109 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
6110 gassign *new_stmt
6111 = gimple_build_assign (new_temp,
6112 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
6113 vop1,
6114 TYPE_SIZE (TREE_TYPE (new_temp)),
6115 bitsize_zero_node));
6116 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6117 vop1 = new_temp;
6119 else
6120 vop1 = vec_oprnds1[i];
6121 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
6122 new_temp = make_ssa_name (vec_dest, new_stmt);
6123 gimple_assign_set_lhs (new_stmt, new_temp);
6124 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6125 if (slp_node)
6126 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6127 else
6128 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6131 if (!slp_node)
6132 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6134 vec_oprnds0.release ();
6135 vec_oprnds1.release ();
6137 return true;
6140 /* Function vectorizable_operation.
6142 Check if STMT_INFO performs a binary, unary or ternary operation that can
6143 be vectorized.
6144 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6145 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6146 Return true if STMT_INFO is vectorizable in this way. */
6148 static bool
6149 vectorizable_operation (vec_info *vinfo,
6150 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6151 gimple **vec_stmt, slp_tree slp_node,
6152 stmt_vector_for_cost *cost_vec)
6154 tree vec_dest;
6155 tree scalar_dest;
6156 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6157 tree vectype;
6158 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6159 enum tree_code code, orig_code;
6160 machine_mode vec_mode;
6161 tree new_temp;
6162 int op_type;
6163 optab optab;
6164 bool target_support_p;
6165 enum vect_def_type dt[3]
6166 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6167 int ndts = 3;
6168 poly_uint64 nunits_in;
6169 poly_uint64 nunits_out;
6170 tree vectype_out;
6171 int ncopies, vec_num;
6172 int i;
6173 vec<tree> vec_oprnds0 = vNULL;
6174 vec<tree> vec_oprnds1 = vNULL;
6175 vec<tree> vec_oprnds2 = vNULL;
6176 tree vop0, vop1, vop2;
6177 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6179 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6180 return false;
6182 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6183 && ! vec_stmt)
6184 return false;
6186 /* Is STMT a vectorizable binary/unary operation? */
6187 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6188 if (!stmt)
6189 return false;
6191 /* Loads and stores are handled in vectorizable_{load,store}. */
6192 if (STMT_VINFO_DATA_REF (stmt_info))
6193 return false;
6195 orig_code = code = gimple_assign_rhs_code (stmt);
6197 /* Shifts are handled in vectorizable_shift. */
6198 if (code == LSHIFT_EXPR
6199 || code == RSHIFT_EXPR
6200 || code == LROTATE_EXPR
6201 || code == RROTATE_EXPR)
6202 return false;
6204 /* Comparisons are handled in vectorizable_comparison. */
6205 if (TREE_CODE_CLASS (code) == tcc_comparison)
6206 return false;
6208 /* Conditions are handled in vectorizable_condition. */
6209 if (code == COND_EXPR)
6210 return false;
6212 /* For pointer addition and subtraction, we should use the normal
6213 plus and minus for the vector operation. */
6214 if (code == POINTER_PLUS_EXPR)
6215 code = PLUS_EXPR;
6216 if (code == POINTER_DIFF_EXPR)
6217 code = MINUS_EXPR;
6219 /* Support only unary or binary operations. */
6220 op_type = TREE_CODE_LENGTH (code);
6221 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6223 if (dump_enabled_p ())
6224 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6225 "num. args = %d (not unary/binary/ternary op).\n",
6226 op_type);
6227 return false;
6230 scalar_dest = gimple_assign_lhs (stmt);
6231 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6233 /* Most operations cannot handle bit-precision types without extra
6234 truncations. */
6235 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6236 if (!mask_op_p
6237 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6238 /* Exception are bitwise binary operations. */
6239 && code != BIT_IOR_EXPR
6240 && code != BIT_XOR_EXPR
6241 && code != BIT_AND_EXPR)
6243 if (dump_enabled_p ())
6244 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6245 "bit-precision arithmetic not supported.\n");
6246 return false;
6249 slp_tree slp_op0;
6250 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6251 0, &op0, &slp_op0, &dt[0], &vectype))
6253 if (dump_enabled_p ())
6254 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6255 "use not simple.\n");
6256 return false;
6258 bool is_invariant = (dt[0] == vect_external_def
6259 || dt[0] == vect_constant_def);
6260 /* If op0 is an external or constant def, infer the vector type
6261 from the scalar type. */
6262 if (!vectype)
6264 /* For boolean type we cannot determine vectype by
6265 invariant value (don't know whether it is a vector
6266 of booleans or vector of integers). We use output
6267 vectype because operations on boolean don't change
6268 type. */
6269 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6271 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6273 if (dump_enabled_p ())
6274 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6275 "not supported operation on bool value.\n");
6276 return false;
6278 vectype = vectype_out;
6280 else
6281 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6282 slp_node);
6284 if (vec_stmt)
6285 gcc_assert (vectype);
6286 if (!vectype)
6288 if (dump_enabled_p ())
6289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6290 "no vectype for scalar type %T\n",
6291 TREE_TYPE (op0));
6293 return false;
6296 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6297 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6298 if (maybe_ne (nunits_out, nunits_in))
6299 return false;
6301 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6302 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6303 if (op_type == binary_op || op_type == ternary_op)
6305 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6306 1, &op1, &slp_op1, &dt[1], &vectype2))
6308 if (dump_enabled_p ())
6309 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6310 "use not simple.\n");
6311 return false;
6313 is_invariant &= (dt[1] == vect_external_def
6314 || dt[1] == vect_constant_def);
6315 if (vectype2
6316 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
6317 return false;
6319 if (op_type == ternary_op)
6321 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6322 2, &op2, &slp_op2, &dt[2], &vectype3))
6324 if (dump_enabled_p ())
6325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6326 "use not simple.\n");
6327 return false;
6329 is_invariant &= (dt[2] == vect_external_def
6330 || dt[2] == vect_constant_def);
6331 if (vectype3
6332 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
6333 return false;
6336 /* Multiple types in SLP are handled by creating the appropriate number of
6337 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6338 case of SLP. */
6339 if (slp_node)
6341 ncopies = 1;
6342 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6344 else
6346 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6347 vec_num = 1;
6350 gcc_assert (ncopies >= 1);
6352 /* Reject attempts to combine mask types with nonmask types, e.g. if
6353 we have an AND between a (nonmask) boolean loaded from memory and
6354 a (mask) boolean result of a comparison.
6356 TODO: We could easily fix these cases up using pattern statements. */
6357 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6358 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6359 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6361 if (dump_enabled_p ())
6362 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6363 "mixed mask and nonmask vector types\n");
6364 return false;
6367 /* Supportable by target? */
6369 vec_mode = TYPE_MODE (vectype);
6370 if (code == MULT_HIGHPART_EXPR)
6371 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6372 else
6374 optab = optab_for_tree_code (code, vectype, optab_default);
6375 if (!optab)
6377 if (dump_enabled_p ())
6378 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6379 "no optab.\n");
6380 return false;
6382 target_support_p = (optab_handler (optab, vec_mode)
6383 != CODE_FOR_nothing);
6386 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6387 if (!target_support_p || using_emulated_vectors_p)
6389 if (dump_enabled_p ())
6390 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6391 "op not supported by target.\n");
6392 /* When vec_mode is not a vector mode and we verified ops we
6393 do not have to lower like AND are natively supported let
6394 those through even when the mode isn't word_mode. For
6395 ops we have to lower the lowering code assumes we are
6396 dealing with word_mode. */
6397 if ((((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
6398 || !target_support_p)
6399 && maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD))
6400 /* Check only during analysis. */
6401 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6403 if (dump_enabled_p ())
6404 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6405 return false;
6407 if (dump_enabled_p ())
6408 dump_printf_loc (MSG_NOTE, vect_location,
6409 "proceeding using word mode.\n");
6410 using_emulated_vectors_p = true;
6413 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6414 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6415 internal_fn cond_fn = get_conditional_internal_fn (code);
6417 /* If operating on inactive elements could generate spurious traps,
6418 we need to restrict the operation to active lanes. Note that this
6419 specifically doesn't apply to unhoisted invariants, since they
6420 operate on the same value for every lane.
6422 Similarly, if this operation is part of a reduction, a fully-masked
6423 loop should only change the active lanes of the reduction chain,
6424 keeping the inactive lanes as-is. */
6425 bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
6426 || reduc_idx >= 0);
6428 if (!vec_stmt) /* transformation not required. */
6430 if (loop_vinfo
6431 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6432 && mask_out_inactive)
6434 if (cond_fn == IFN_LAST
6435 || !direct_internal_fn_supported_p (cond_fn, vectype,
6436 OPTIMIZE_FOR_SPEED))
6438 if (dump_enabled_p ())
6439 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6440 "can't use a fully-masked loop because no"
6441 " conditional operation is available.\n");
6442 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6444 else
6445 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6446 vectype, NULL);
6449 /* Put types on constant and invariant SLP children. */
6450 if (slp_node
6451 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6452 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6453 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6455 if (dump_enabled_p ())
6456 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6457 "incompatible vector types for invariants\n");
6458 return false;
6461 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6462 DUMP_VECT_SCOPE ("vectorizable_operation");
6463 vect_model_simple_cost (vinfo, stmt_info,
6464 ncopies, dt, ndts, slp_node, cost_vec);
6465 if (using_emulated_vectors_p)
6467 /* The above vect_model_simple_cost call handles constants
6468 in the prologue and (mis-)costs one of the stmts as
6469 vector stmt. See below for the actual lowering that will
6470 be applied. */
6471 unsigned n
6472 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6473 switch (code)
6475 case PLUS_EXPR:
6476 n *= 5;
6477 break;
6478 case MINUS_EXPR:
6479 n *= 6;
6480 break;
6481 case NEGATE_EXPR:
6482 n *= 4;
6483 break;
6484 default:
6485 /* Bit operations do not have extra cost and are accounted
6486 as vector stmt by vect_model_simple_cost. */
6487 n = 0;
6488 break;
6490 if (n != 0)
6492 /* We also need to materialize two large constants. */
6493 record_stmt_cost (cost_vec, 2, scalar_stmt, stmt_info,
6494 0, vect_prologue);
6495 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info,
6496 0, vect_body);
6499 return true;
6502 /* Transform. */
6504 if (dump_enabled_p ())
6505 dump_printf_loc (MSG_NOTE, vect_location,
6506 "transform binary/unary operation.\n");
6508 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6510 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6511 vectors with unsigned elements, but the result is signed. So, we
6512 need to compute the MINUS_EXPR into vectype temporary and
6513 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6514 tree vec_cvt_dest = NULL_TREE;
6515 if (orig_code == POINTER_DIFF_EXPR)
6517 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6518 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6520 /* Handle def. */
6521 else
6522 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6524 /* In case the vectorization factor (VF) is bigger than the number
6525 of elements that we can fit in a vectype (nunits), we have to generate
6526 more than one vector stmt - i.e - we need to "unroll" the
6527 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6528 from one copy of the vector stmt to the next, in the field
6529 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6530 stages to find the correct vector defs to be used when vectorizing
6531 stmts that use the defs of the current stmt. The example below
6532 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6533 we need to create 4 vectorized stmts):
6535 before vectorization:
6536 RELATED_STMT VEC_STMT
6537 S1: x = memref - -
6538 S2: z = x + 1 - -
6540 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6541 there):
6542 RELATED_STMT VEC_STMT
6543 VS1_0: vx0 = memref0 VS1_1 -
6544 VS1_1: vx1 = memref1 VS1_2 -
6545 VS1_2: vx2 = memref2 VS1_3 -
6546 VS1_3: vx3 = memref3 - -
6547 S1: x = load - VS1_0
6548 S2: z = x + 1 - -
6550 step2: vectorize stmt S2 (done here):
6551 To vectorize stmt S2 we first need to find the relevant vector
6552 def for the first operand 'x'. This is, as usual, obtained from
6553 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6554 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6555 relevant vector def 'vx0'. Having found 'vx0' we can generate
6556 the vector stmt VS2_0, and as usual, record it in the
6557 STMT_VINFO_VEC_STMT of stmt S2.
6558 When creating the second copy (VS2_1), we obtain the relevant vector
6559 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6560 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6561 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6562 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6563 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6564 chain of stmts and pointers:
6565 RELATED_STMT VEC_STMT
6566 VS1_0: vx0 = memref0 VS1_1 -
6567 VS1_1: vx1 = memref1 VS1_2 -
6568 VS1_2: vx2 = memref2 VS1_3 -
6569 VS1_3: vx3 = memref3 - -
6570 S1: x = load - VS1_0
6571 VS2_0: vz0 = vx0 + v1 VS2_1 -
6572 VS2_1: vz1 = vx1 + v1 VS2_2 -
6573 VS2_2: vz2 = vx2 + v1 VS2_3 -
6574 VS2_3: vz3 = vx3 + v1 - -
6575 S2: z = x + 1 - VS2_0 */
6577 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6578 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6579 /* Arguments are ready. Create the new vector stmt. */
6580 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6582 gimple *new_stmt = NULL;
6583 vop1 = ((op_type == binary_op || op_type == ternary_op)
6584 ? vec_oprnds1[i] : NULL_TREE);
6585 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6586 if (using_emulated_vectors_p
6587 && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR))
6589 /* Lower the operation. This follows vector lowering. */
6590 unsigned int width = vector_element_bits (vectype);
6591 tree inner_type = TREE_TYPE (vectype);
6592 tree word_type
6593 = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode), 1);
6594 HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
6595 tree low_bits = build_replicated_int_cst (word_type, width, max >> 1);
6596 tree high_bits
6597 = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
6598 tree wvop0 = make_ssa_name (word_type);
6599 new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR,
6600 build1 (VIEW_CONVERT_EXPR,
6601 word_type, vop0));
6602 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6603 tree result_low, signs;
6604 if (code == PLUS_EXPR || code == MINUS_EXPR)
6606 tree wvop1 = make_ssa_name (word_type);
6607 new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR,
6608 build1 (VIEW_CONVERT_EXPR,
6609 word_type, vop1));
6610 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6611 signs = make_ssa_name (word_type);
6612 new_stmt = gimple_build_assign (signs,
6613 BIT_XOR_EXPR, wvop0, wvop1);
6614 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6615 tree b_low = make_ssa_name (word_type);
6616 new_stmt = gimple_build_assign (b_low,
6617 BIT_AND_EXPR, wvop1, low_bits);
6618 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6619 tree a_low = make_ssa_name (word_type);
6620 if (code == PLUS_EXPR)
6621 new_stmt = gimple_build_assign (a_low,
6622 BIT_AND_EXPR, wvop0, low_bits);
6623 else
6624 new_stmt = gimple_build_assign (a_low,
6625 BIT_IOR_EXPR, wvop0, high_bits);
6626 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6627 if (code == MINUS_EXPR)
6629 new_stmt = gimple_build_assign (NULL_TREE,
6630 BIT_NOT_EXPR, signs);
6631 signs = make_ssa_name (word_type);
6632 gimple_assign_set_lhs (new_stmt, signs);
6633 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6635 new_stmt = gimple_build_assign (NULL_TREE,
6636 BIT_AND_EXPR, signs, high_bits);
6637 signs = make_ssa_name (word_type);
6638 gimple_assign_set_lhs (new_stmt, signs);
6639 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6640 result_low = make_ssa_name (word_type);
6641 new_stmt = gimple_build_assign (result_low, code, a_low, b_low);
6642 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6644 else
6646 tree a_low = make_ssa_name (word_type);
6647 new_stmt = gimple_build_assign (a_low,
6648 BIT_AND_EXPR, wvop0, low_bits);
6649 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6650 signs = make_ssa_name (word_type);
6651 new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
6652 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6653 new_stmt = gimple_build_assign (NULL_TREE,
6654 BIT_AND_EXPR, signs, high_bits);
6655 signs = make_ssa_name (word_type);
6656 gimple_assign_set_lhs (new_stmt, signs);
6657 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6658 result_low = make_ssa_name (word_type);
6659 new_stmt = gimple_build_assign (result_low,
6660 MINUS_EXPR, high_bits, a_low);
6661 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6663 new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR, result_low,
6664 signs);
6665 result_low = make_ssa_name (word_type);
6666 gimple_assign_set_lhs (new_stmt, result_low);
6667 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6668 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR,
6669 build1 (VIEW_CONVERT_EXPR,
6670 vectype, result_low));
6671 new_temp = make_ssa_name (vectype);
6672 gimple_assign_set_lhs (new_stmt, new_temp);
6673 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6675 else if (masked_loop_p && mask_out_inactive)
6677 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6678 vectype, i);
6679 auto_vec<tree> vops (5);
6680 vops.quick_push (mask);
6681 vops.quick_push (vop0);
6682 if (vop1)
6683 vops.quick_push (vop1);
6684 if (vop2)
6685 vops.quick_push (vop2);
6686 if (reduc_idx >= 0)
6688 /* Perform the operation on active elements only and take
6689 inactive elements from the reduction chain input. */
6690 gcc_assert (!vop2);
6691 vops.quick_push (reduc_idx == 1 ? vop1 : vop0);
6693 else
6695 auto else_value = targetm.preferred_else_value
6696 (cond_fn, vectype, vops.length () - 1, &vops[1]);
6697 vops.quick_push (else_value);
6699 gcall *call = gimple_build_call_internal_vec (cond_fn, vops);
6700 new_temp = make_ssa_name (vec_dest, call);
6701 gimple_call_set_lhs (call, new_temp);
6702 gimple_call_set_nothrow (call, true);
6703 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6704 new_stmt = call;
6706 else
6708 tree mask = NULL_TREE;
6709 /* When combining two masks check if either of them is elsewhere
6710 combined with a loop mask, if that's the case we can mark that the
6711 new combined mask doesn't need to be combined with a loop mask. */
6712 if (masked_loop_p
6713 && code == BIT_AND_EXPR
6714 && VECTOR_BOOLEAN_TYPE_P (vectype))
6716 if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
6717 ncopies}))
6719 mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6720 vectype, i);
6722 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6723 vop0, gsi);
6726 if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
6727 ncopies }))
6729 mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6730 vectype, i);
6732 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6733 vop1, gsi);
6737 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6738 new_temp = make_ssa_name (vec_dest, new_stmt);
6739 gimple_assign_set_lhs (new_stmt, new_temp);
6740 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6741 if (using_emulated_vectors_p)
6742 suppress_warning (new_stmt, OPT_Wvector_operation_performance);
6744 /* Enter the combined value into the vector cond hash so we don't
6745 AND it with a loop mask again. */
6746 if (mask)
6747 loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
6750 if (vec_cvt_dest)
6752 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6753 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6754 new_temp);
6755 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6756 gimple_assign_set_lhs (new_stmt, new_temp);
6757 vect_finish_stmt_generation (vinfo, stmt_info,
6758 new_stmt, gsi);
6761 if (slp_node)
6762 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6763 else
6764 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6767 if (!slp_node)
6768 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6770 vec_oprnds0.release ();
6771 vec_oprnds1.release ();
6772 vec_oprnds2.release ();
6774 return true;
6777 /* A helper function to ensure data reference DR_INFO's base alignment. */
6779 static void
6780 ensure_base_align (dr_vec_info *dr_info)
6782 /* Alignment is only analyzed for the first element of a DR group,
6783 use that to look at base alignment we need to enforce. */
6784 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
6785 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
6787 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
6789 if (dr_info->base_misaligned)
6791 tree base_decl = dr_info->base_decl;
6793 // We should only be able to increase the alignment of a base object if
6794 // we know what its new alignment should be at compile time.
6795 unsigned HOST_WIDE_INT align_base_to =
6796 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6798 if (decl_in_symtab_p (base_decl))
6799 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6800 else if (DECL_ALIGN (base_decl) < align_base_to)
6802 SET_DECL_ALIGN (base_decl, align_base_to);
6803 DECL_USER_ALIGN (base_decl) = 1;
6805 dr_info->base_misaligned = false;
6810 /* Function get_group_alias_ptr_type.
6812 Return the alias type for the group starting at FIRST_STMT_INFO. */
6814 static tree
6815 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6817 struct data_reference *first_dr, *next_dr;
6819 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6820 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6821 while (next_stmt_info)
6823 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6824 if (get_alias_set (DR_REF (first_dr))
6825 != get_alias_set (DR_REF (next_dr)))
6827 if (dump_enabled_p ())
6828 dump_printf_loc (MSG_NOTE, vect_location,
6829 "conflicting alias set types.\n");
6830 return ptr_type_node;
6832 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6834 return reference_alias_ptr_type (DR_REF (first_dr));
6838 /* Function scan_operand_equal_p.
6840 Helper function for check_scan_store. Compare two references
6841 with .GOMP_SIMD_LANE bases. */
6843 static bool
6844 scan_operand_equal_p (tree ref1, tree ref2)
6846 tree ref[2] = { ref1, ref2 };
6847 poly_int64 bitsize[2], bitpos[2];
6848 tree offset[2], base[2];
6849 for (int i = 0; i < 2; ++i)
6851 machine_mode mode;
6852 int unsignedp, reversep, volatilep = 0;
6853 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6854 &offset[i], &mode, &unsignedp,
6855 &reversep, &volatilep);
6856 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6857 return false;
6858 if (TREE_CODE (base[i]) == MEM_REF
6859 && offset[i] == NULL_TREE
6860 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6862 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6863 if (is_gimple_assign (def_stmt)
6864 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6865 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6866 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6868 if (maybe_ne (mem_ref_offset (base[i]), 0))
6869 return false;
6870 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6871 offset[i] = gimple_assign_rhs2 (def_stmt);
6876 if (!operand_equal_p (base[0], base[1], 0))
6877 return false;
6878 if (maybe_ne (bitsize[0], bitsize[1]))
6879 return false;
6880 if (offset[0] != offset[1])
6882 if (!offset[0] || !offset[1])
6883 return false;
6884 if (!operand_equal_p (offset[0], offset[1], 0))
6886 tree step[2];
6887 for (int i = 0; i < 2; ++i)
6889 step[i] = integer_one_node;
6890 if (TREE_CODE (offset[i]) == SSA_NAME)
6892 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6893 if (is_gimple_assign (def_stmt)
6894 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6895 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6896 == INTEGER_CST))
6898 step[i] = gimple_assign_rhs2 (def_stmt);
6899 offset[i] = gimple_assign_rhs1 (def_stmt);
6902 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6904 step[i] = TREE_OPERAND (offset[i], 1);
6905 offset[i] = TREE_OPERAND (offset[i], 0);
6907 tree rhs1 = NULL_TREE;
6908 if (TREE_CODE (offset[i]) == SSA_NAME)
6910 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6911 if (gimple_assign_cast_p (def_stmt))
6912 rhs1 = gimple_assign_rhs1 (def_stmt);
6914 else if (CONVERT_EXPR_P (offset[i]))
6915 rhs1 = TREE_OPERAND (offset[i], 0);
6916 if (rhs1
6917 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6918 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6919 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6920 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6921 offset[i] = rhs1;
6923 if (!operand_equal_p (offset[0], offset[1], 0)
6924 || !operand_equal_p (step[0], step[1], 0))
6925 return false;
6928 return true;
6932 enum scan_store_kind {
6933 /* Normal permutation. */
6934 scan_store_kind_perm,
6936 /* Whole vector left shift permutation with zero init. */
6937 scan_store_kind_lshift_zero,
6939 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6940 scan_store_kind_lshift_cond
6943 /* Function check_scan_store.
6945 Verify if we can perform the needed permutations or whole vector shifts.
6946 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6947 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6948 to do at each step. */
6950 static int
6951 scan_store_can_perm_p (tree vectype, tree init,
6952 vec<enum scan_store_kind> *use_whole_vector = NULL)
6954 enum machine_mode vec_mode = TYPE_MODE (vectype);
6955 unsigned HOST_WIDE_INT nunits;
6956 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6957 return -1;
6958 int units_log2 = exact_log2 (nunits);
6959 if (units_log2 <= 0)
6960 return -1;
6962 int i;
6963 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6964 for (i = 0; i <= units_log2; ++i)
6966 unsigned HOST_WIDE_INT j, k;
6967 enum scan_store_kind kind = scan_store_kind_perm;
6968 vec_perm_builder sel (nunits, nunits, 1);
6969 sel.quick_grow (nunits);
6970 if (i == units_log2)
6972 for (j = 0; j < nunits; ++j)
6973 sel[j] = nunits - 1;
6975 else
6977 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6978 sel[j] = j;
6979 for (k = 0; j < nunits; ++j, ++k)
6980 sel[j] = nunits + k;
6982 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6983 if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
6985 if (i == units_log2)
6986 return -1;
6988 if (whole_vector_shift_kind == scan_store_kind_perm)
6990 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6991 return -1;
6992 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6993 /* Whole vector shifts shift in zeros, so if init is all zero
6994 constant, there is no need to do anything further. */
6995 if ((TREE_CODE (init) != INTEGER_CST
6996 && TREE_CODE (init) != REAL_CST)
6997 || !initializer_zerop (init))
6999 tree masktype = truth_type_for (vectype);
7000 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
7001 return -1;
7002 whole_vector_shift_kind = scan_store_kind_lshift_cond;
7005 kind = whole_vector_shift_kind;
7007 if (use_whole_vector)
7009 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
7010 use_whole_vector->safe_grow_cleared (i, true);
7011 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
7012 use_whole_vector->safe_push (kind);
7016 return units_log2;
7020 /* Function check_scan_store.
7022 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
7024 static bool
7025 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
7026 enum vect_def_type rhs_dt, bool slp, tree mask,
7027 vect_memory_access_type memory_access_type)
7029 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7030 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7031 tree ref_type;
7033 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
7034 if (slp
7035 || mask
7036 || memory_access_type != VMAT_CONTIGUOUS
7037 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
7038 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
7039 || loop_vinfo == NULL
7040 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7041 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
7042 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
7043 || !integer_zerop (DR_INIT (dr_info->dr))
7044 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
7045 || !alias_sets_conflict_p (get_alias_set (vectype),
7046 get_alias_set (TREE_TYPE (ref_type))))
7048 if (dump_enabled_p ())
7049 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7050 "unsupported OpenMP scan store.\n");
7051 return false;
7054 /* We need to pattern match code built by OpenMP lowering and simplified
7055 by following optimizations into something we can handle.
7056 #pragma omp simd reduction(inscan,+:r)
7057 for (...)
7059 r += something ();
7060 #pragma omp scan inclusive (r)
7061 use (r);
7063 shall have body with:
7064 // Initialization for input phase, store the reduction initializer:
7065 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7066 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7067 D.2042[_21] = 0;
7068 // Actual input phase:
7070 r.0_5 = D.2042[_20];
7071 _6 = _4 + r.0_5;
7072 D.2042[_20] = _6;
7073 // Initialization for scan phase:
7074 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7075 _26 = D.2043[_25];
7076 _27 = D.2042[_25];
7077 _28 = _26 + _27;
7078 D.2043[_25] = _28;
7079 D.2042[_25] = _28;
7080 // Actual scan phase:
7082 r.1_8 = D.2042[_20];
7084 The "omp simd array" variable D.2042 holds the privatized copy used
7085 inside of the loop and D.2043 is another one that holds copies of
7086 the current original list item. The separate GOMP_SIMD_LANE ifn
7087 kinds are there in order to allow optimizing the initializer store
7088 and combiner sequence, e.g. if it is originally some C++ish user
7089 defined reduction, but allow the vectorizer to pattern recognize it
7090 and turn into the appropriate vectorized scan.
7092 For exclusive scan, this is slightly different:
7093 #pragma omp simd reduction(inscan,+:r)
7094 for (...)
7096 use (r);
7097 #pragma omp scan exclusive (r)
7098 r += something ();
7100 shall have body with:
7101 // Initialization for input phase, store the reduction initializer:
7102 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7103 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7104 D.2042[_21] = 0;
7105 // Actual input phase:
7107 r.0_5 = D.2042[_20];
7108 _6 = _4 + r.0_5;
7109 D.2042[_20] = _6;
7110 // Initialization for scan phase:
7111 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7112 _26 = D.2043[_25];
7113 D.2044[_25] = _26;
7114 _27 = D.2042[_25];
7115 _28 = _26 + _27;
7116 D.2043[_25] = _28;
7117 // Actual scan phase:
7119 r.1_8 = D.2044[_20];
7120 ... */
7122 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
7124 /* Match the D.2042[_21] = 0; store above. Just require that
7125 it is a constant or external definition store. */
7126 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
7128 fail_init:
7129 if (dump_enabled_p ())
7130 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7131 "unsupported OpenMP scan initializer store.\n");
7132 return false;
7135 if (! loop_vinfo->scan_map)
7136 loop_vinfo->scan_map = new hash_map<tree, tree>;
7137 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7138 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
7139 if (cached)
7140 goto fail_init;
7141 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
7143 /* These stores can be vectorized normally. */
7144 return true;
7147 if (rhs_dt != vect_internal_def)
7149 fail:
7150 if (dump_enabled_p ())
7151 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7152 "unsupported OpenMP scan combiner pattern.\n");
7153 return false;
7156 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7157 tree rhs = gimple_assign_rhs1 (stmt);
7158 if (TREE_CODE (rhs) != SSA_NAME)
7159 goto fail;
7161 gimple *other_store_stmt = NULL;
7162 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7163 bool inscan_var_store
7164 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7166 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7168 if (!inscan_var_store)
7170 use_operand_p use_p;
7171 imm_use_iterator iter;
7172 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7174 gimple *use_stmt = USE_STMT (use_p);
7175 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7176 continue;
7177 if (gimple_bb (use_stmt) != gimple_bb (stmt)
7178 || !is_gimple_assign (use_stmt)
7179 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
7180 || other_store_stmt
7181 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
7182 goto fail;
7183 other_store_stmt = use_stmt;
7185 if (other_store_stmt == NULL)
7186 goto fail;
7187 rhs = gimple_assign_lhs (other_store_stmt);
7188 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
7189 goto fail;
7192 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
7194 use_operand_p use_p;
7195 imm_use_iterator iter;
7196 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7198 gimple *use_stmt = USE_STMT (use_p);
7199 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7200 continue;
7201 if (other_store_stmt)
7202 goto fail;
7203 other_store_stmt = use_stmt;
7206 else
7207 goto fail;
7209 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7210 if (gimple_bb (def_stmt) != gimple_bb (stmt)
7211 || !is_gimple_assign (def_stmt)
7212 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
7213 goto fail;
7215 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7216 /* For pointer addition, we should use the normal plus for the vector
7217 operation. */
7218 switch (code)
7220 case POINTER_PLUS_EXPR:
7221 code = PLUS_EXPR;
7222 break;
7223 case MULT_HIGHPART_EXPR:
7224 goto fail;
7225 default:
7226 break;
7228 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
7229 goto fail;
7231 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7232 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7233 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
7234 goto fail;
7236 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7237 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7238 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
7239 || !gimple_assign_load_p (load1_stmt)
7240 || gimple_bb (load2_stmt) != gimple_bb (stmt)
7241 || !gimple_assign_load_p (load2_stmt))
7242 goto fail;
7244 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7245 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7246 if (load1_stmt_info == NULL
7247 || load2_stmt_info == NULL
7248 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7249 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7250 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7251 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7252 goto fail;
7254 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
7256 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7257 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7258 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
7259 goto fail;
7260 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7261 tree lrhs;
7262 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7263 lrhs = rhs1;
7264 else
7265 lrhs = rhs2;
7266 use_operand_p use_p;
7267 imm_use_iterator iter;
7268 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7270 gimple *use_stmt = USE_STMT (use_p);
7271 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
7272 continue;
7273 if (other_store_stmt)
7274 goto fail;
7275 other_store_stmt = use_stmt;
7279 if (other_store_stmt == NULL)
7280 goto fail;
7281 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
7282 || !gimple_store_p (other_store_stmt))
7283 goto fail;
7285 stmt_vec_info other_store_stmt_info
7286 = loop_vinfo->lookup_stmt (other_store_stmt);
7287 if (other_store_stmt_info == NULL
7288 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7289 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7290 goto fail;
7292 gimple *stmt1 = stmt;
7293 gimple *stmt2 = other_store_stmt;
7294 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7295 std::swap (stmt1, stmt2);
7296 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
7297 gimple_assign_rhs1 (load2_stmt)))
7299 std::swap (rhs1, rhs2);
7300 std::swap (load1_stmt, load2_stmt);
7301 std::swap (load1_stmt_info, load2_stmt_info);
7303 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
7304 gimple_assign_rhs1 (load1_stmt)))
7305 goto fail;
7307 tree var3 = NULL_TREE;
7308 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7309 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
7310 gimple_assign_rhs1 (load2_stmt)))
7311 goto fail;
7312 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7314 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7315 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7316 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7317 goto fail;
7318 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7319 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
7320 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
7321 || lookup_attribute ("omp simd inscan exclusive",
7322 DECL_ATTRIBUTES (var3)))
7323 goto fail;
7326 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7327 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7328 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7329 goto fail;
7331 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7332 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7333 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
7334 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
7335 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7336 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
7337 goto fail;
7339 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7340 std::swap (var1, var2);
7342 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7344 if (!lookup_attribute ("omp simd inscan exclusive",
7345 DECL_ATTRIBUTES (var1)))
7346 goto fail;
7347 var1 = var3;
7350 if (loop_vinfo->scan_map == NULL)
7351 goto fail;
7352 tree *init = loop_vinfo->scan_map->get (var1);
7353 if (init == NULL)
7354 goto fail;
7356 /* The IL is as expected, now check if we can actually vectorize it.
7357 Inclusive scan:
7358 _26 = D.2043[_25];
7359 _27 = D.2042[_25];
7360 _28 = _26 + _27;
7361 D.2043[_25] = _28;
7362 D.2042[_25] = _28;
7363 should be vectorized as (where _40 is the vectorized rhs
7364 from the D.2042[_21] = 0; store):
7365 _30 = MEM <vector(8) int> [(int *)&D.2043];
7366 _31 = MEM <vector(8) int> [(int *)&D.2042];
7367 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7368 _33 = _31 + _32;
7369 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7370 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7371 _35 = _33 + _34;
7372 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7373 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7374 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7375 _37 = _35 + _36;
7376 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7377 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7378 _38 = _30 + _37;
7379 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7380 MEM <vector(8) int> [(int *)&D.2043] = _39;
7381 MEM <vector(8) int> [(int *)&D.2042] = _38;
7382 Exclusive scan:
7383 _26 = D.2043[_25];
7384 D.2044[_25] = _26;
7385 _27 = D.2042[_25];
7386 _28 = _26 + _27;
7387 D.2043[_25] = _28;
7388 should be vectorized as (where _40 is the vectorized rhs
7389 from the D.2042[_21] = 0; store):
7390 _30 = MEM <vector(8) int> [(int *)&D.2043];
7391 _31 = MEM <vector(8) int> [(int *)&D.2042];
7392 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7393 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7394 _34 = _32 + _33;
7395 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7396 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7397 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7398 _36 = _34 + _35;
7399 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7400 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7401 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7402 _38 = _36 + _37;
7403 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7404 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7405 _39 = _30 + _38;
7406 _50 = _31 + _39;
7407 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7408 MEM <vector(8) int> [(int *)&D.2044] = _39;
7409 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7410 enum machine_mode vec_mode = TYPE_MODE (vectype);
7411 optab optab = optab_for_tree_code (code, vectype, optab_default);
7412 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7413 goto fail;
7415 int units_log2 = scan_store_can_perm_p (vectype, *init);
7416 if (units_log2 == -1)
7417 goto fail;
7419 return true;
7423 /* Function vectorizable_scan_store.
7425 Helper of vectorizable_score, arguments like on vectorizable_store.
7426 Handle only the transformation, checking is done in check_scan_store. */
7428 static bool
7429 vectorizable_scan_store (vec_info *vinfo,
7430 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7431 gimple **vec_stmt, int ncopies)
7433 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7434 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7435 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7436 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7438 if (dump_enabled_p ())
7439 dump_printf_loc (MSG_NOTE, vect_location,
7440 "transform scan store. ncopies = %d\n", ncopies);
7442 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7443 tree rhs = gimple_assign_rhs1 (stmt);
7444 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7446 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7447 bool inscan_var_store
7448 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7450 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7452 use_operand_p use_p;
7453 imm_use_iterator iter;
7454 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7456 gimple *use_stmt = USE_STMT (use_p);
7457 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7458 continue;
7459 rhs = gimple_assign_lhs (use_stmt);
7460 break;
7464 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7465 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7466 if (code == POINTER_PLUS_EXPR)
7467 code = PLUS_EXPR;
7468 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7469 && commutative_tree_code (code));
7470 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7471 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7472 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7473 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7474 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7475 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7476 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7477 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7478 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7479 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7480 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7482 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7484 std::swap (rhs1, rhs2);
7485 std::swap (var1, var2);
7486 std::swap (load1_dr_info, load2_dr_info);
7489 tree *init = loop_vinfo->scan_map->get (var1);
7490 gcc_assert (init);
7492 unsigned HOST_WIDE_INT nunits;
7493 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7494 gcc_unreachable ();
7495 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7496 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7497 gcc_assert (units_log2 > 0);
7498 auto_vec<tree, 16> perms;
7499 perms.quick_grow (units_log2 + 1);
7500 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7501 for (int i = 0; i <= units_log2; ++i)
7503 unsigned HOST_WIDE_INT j, k;
7504 vec_perm_builder sel (nunits, nunits, 1);
7505 sel.quick_grow (nunits);
7506 if (i == units_log2)
7507 for (j = 0; j < nunits; ++j)
7508 sel[j] = nunits - 1;
7509 else
7511 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7512 sel[j] = j;
7513 for (k = 0; j < nunits; ++j, ++k)
7514 sel[j] = nunits + k;
7516 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7517 if (!use_whole_vector.is_empty ()
7518 && use_whole_vector[i] != scan_store_kind_perm)
7520 if (zero_vec == NULL_TREE)
7521 zero_vec = build_zero_cst (vectype);
7522 if (masktype == NULL_TREE
7523 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7524 masktype = truth_type_for (vectype);
7525 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7527 else
7528 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7531 tree vec_oprnd1 = NULL_TREE;
7532 tree vec_oprnd2 = NULL_TREE;
7533 tree vec_oprnd3 = NULL_TREE;
7534 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7535 tree dataref_offset = build_int_cst (ref_type, 0);
7536 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
7537 vectype, VMAT_CONTIGUOUS);
7538 tree ldataref_ptr = NULL_TREE;
7539 tree orig = NULL_TREE;
7540 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7541 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7542 auto_vec<tree> vec_oprnds1;
7543 auto_vec<tree> vec_oprnds2;
7544 auto_vec<tree> vec_oprnds3;
7545 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7546 *init, &vec_oprnds1,
7547 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
7548 rhs2, &vec_oprnds3);
7549 for (int j = 0; j < ncopies; j++)
7551 vec_oprnd1 = vec_oprnds1[j];
7552 if (ldataref_ptr == NULL)
7553 vec_oprnd2 = vec_oprnds2[j];
7554 vec_oprnd3 = vec_oprnds3[j];
7555 if (j == 0)
7556 orig = vec_oprnd3;
7557 else if (!inscan_var_store)
7558 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7560 if (ldataref_ptr)
7562 vec_oprnd2 = make_ssa_name (vectype);
7563 tree data_ref = fold_build2 (MEM_REF, vectype,
7564 unshare_expr (ldataref_ptr),
7565 dataref_offset);
7566 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7567 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7568 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7569 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7570 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7573 tree v = vec_oprnd2;
7574 for (int i = 0; i < units_log2; ++i)
7576 tree new_temp = make_ssa_name (vectype);
7577 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7578 (zero_vec
7579 && (use_whole_vector[i]
7580 != scan_store_kind_perm))
7581 ? zero_vec : vec_oprnd1, v,
7582 perms[i]);
7583 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7584 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7585 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7587 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7589 /* Whole vector shift shifted in zero bits, but if *init
7590 is not initializer_zerop, we need to replace those elements
7591 with elements from vec_oprnd1. */
7592 tree_vector_builder vb (masktype, nunits, 1);
7593 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7594 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7595 ? boolean_false_node : boolean_true_node);
7597 tree new_temp2 = make_ssa_name (vectype);
7598 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7599 new_temp, vec_oprnd1);
7600 vect_finish_stmt_generation (vinfo, stmt_info,
7601 g, gsi);
7602 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7603 new_temp = new_temp2;
7606 /* For exclusive scan, perform the perms[i] permutation once
7607 more. */
7608 if (i == 0
7609 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7610 && v == vec_oprnd2)
7612 v = new_temp;
7613 --i;
7614 continue;
7617 tree new_temp2 = make_ssa_name (vectype);
7618 g = gimple_build_assign (new_temp2, code, v, new_temp);
7619 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7620 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7622 v = new_temp2;
7625 tree new_temp = make_ssa_name (vectype);
7626 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7627 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7628 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7630 tree last_perm_arg = new_temp;
7631 /* For exclusive scan, new_temp computed above is the exclusive scan
7632 prefix sum. Turn it into inclusive prefix sum for the broadcast
7633 of the last element into orig. */
7634 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7636 last_perm_arg = make_ssa_name (vectype);
7637 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7638 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7639 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7642 orig = make_ssa_name (vectype);
7643 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7644 last_perm_arg, perms[units_log2]);
7645 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7646 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7648 if (!inscan_var_store)
7650 tree data_ref = fold_build2 (MEM_REF, vectype,
7651 unshare_expr (dataref_ptr),
7652 dataref_offset);
7653 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7654 g = gimple_build_assign (data_ref, new_temp);
7655 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7656 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7660 if (inscan_var_store)
7661 for (int j = 0; j < ncopies; j++)
7663 if (j != 0)
7664 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7666 tree data_ref = fold_build2 (MEM_REF, vectype,
7667 unshare_expr (dataref_ptr),
7668 dataref_offset);
7669 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7670 gimple *g = gimple_build_assign (data_ref, orig);
7671 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7672 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7674 return true;
7678 /* Function vectorizable_store.
7680 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7681 that can be vectorized.
7682 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7683 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7684 Return true if STMT_INFO is vectorizable in this way. */
7686 static bool
7687 vectorizable_store (vec_info *vinfo,
7688 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7689 gimple **vec_stmt, slp_tree slp_node,
7690 stmt_vector_for_cost *cost_vec)
7692 tree data_ref;
7693 tree op;
7694 tree vec_oprnd = NULL_TREE;
7695 tree elem_type;
7696 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7697 class loop *loop = NULL;
7698 machine_mode vec_mode;
7699 tree dummy;
7700 enum vect_def_type rhs_dt = vect_unknown_def_type;
7701 enum vect_def_type mask_dt = vect_unknown_def_type;
7702 tree dataref_ptr = NULL_TREE;
7703 tree dataref_offset = NULL_TREE;
7704 gimple *ptr_incr = NULL;
7705 int ncopies;
7706 int j;
7707 stmt_vec_info first_stmt_info;
7708 bool grouped_store;
7709 unsigned int group_size, i;
7710 vec<tree> oprnds = vNULL;
7711 vec<tree> result_chain = vNULL;
7712 vec<tree> vec_oprnds = vNULL;
7713 bool slp = (slp_node != NULL);
7714 unsigned int vec_num;
7715 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7716 tree aggr_type;
7717 gather_scatter_info gs_info;
7718 poly_uint64 vf;
7719 vec_load_store_type vls_type;
7720 tree ref_type;
7722 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7723 return false;
7725 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7726 && ! vec_stmt)
7727 return false;
7729 /* Is vectorizable store? */
7731 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7732 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7734 tree scalar_dest = gimple_assign_lhs (assign);
7735 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7736 && is_pattern_stmt_p (stmt_info))
7737 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7738 if (TREE_CODE (scalar_dest) != ARRAY_REF
7739 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7740 && TREE_CODE (scalar_dest) != INDIRECT_REF
7741 && TREE_CODE (scalar_dest) != COMPONENT_REF
7742 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7743 && TREE_CODE (scalar_dest) != REALPART_EXPR
7744 && TREE_CODE (scalar_dest) != MEM_REF)
7745 return false;
7747 else
7749 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7750 if (!call || !gimple_call_internal_p (call))
7751 return false;
7753 internal_fn ifn = gimple_call_internal_fn (call);
7754 if (!internal_store_fn_p (ifn))
7755 return false;
7757 if (slp_node != NULL)
7759 if (dump_enabled_p ())
7760 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7761 "SLP of masked stores not supported.\n");
7762 return false;
7765 int mask_index = internal_fn_mask_index (ifn);
7766 if (mask_index >= 0
7767 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
7768 &mask, NULL, &mask_dt, &mask_vectype))
7769 return false;
7772 op = vect_get_store_rhs (stmt_info);
7774 /* Cannot have hybrid store SLP -- that would mean storing to the
7775 same location twice. */
7776 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7778 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7779 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7781 if (loop_vinfo)
7783 loop = LOOP_VINFO_LOOP (loop_vinfo);
7784 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7786 else
7787 vf = 1;
7789 /* Multiple types in SLP are handled by creating the appropriate number of
7790 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7791 case of SLP. */
7792 if (slp)
7793 ncopies = 1;
7794 else
7795 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7797 gcc_assert (ncopies >= 1);
7799 /* FORNOW. This restriction should be relaxed. */
7800 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7802 if (dump_enabled_p ())
7803 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7804 "multiple types in nested loop.\n");
7805 return false;
7808 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7809 op, &rhs_dt, &rhs_vectype, &vls_type))
7810 return false;
7812 elem_type = TREE_TYPE (vectype);
7813 vec_mode = TYPE_MODE (vectype);
7815 if (!STMT_VINFO_DATA_REF (stmt_info))
7816 return false;
7818 vect_memory_access_type memory_access_type;
7819 enum dr_alignment_support alignment_support_scheme;
7820 int misalignment;
7821 poly_int64 poffset;
7822 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7823 ncopies, &memory_access_type, &poffset,
7824 &alignment_support_scheme, &misalignment, &gs_info))
7825 return false;
7827 if (mask)
7829 if (memory_access_type == VMAT_CONTIGUOUS)
7831 if (!VECTOR_MODE_P (vec_mode)
7832 || !can_vec_mask_load_store_p (vec_mode,
7833 TYPE_MODE (mask_vectype), false))
7834 return false;
7836 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7837 && (memory_access_type != VMAT_GATHER_SCATTER
7838 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7840 if (dump_enabled_p ())
7841 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7842 "unsupported access type for masked store.\n");
7843 return false;
7845 else if (memory_access_type == VMAT_GATHER_SCATTER
7846 && gs_info.ifn == IFN_LAST
7847 && !gs_info.decl)
7849 if (dump_enabled_p ())
7850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7851 "unsupported masked emulated scatter.\n");
7852 return false;
7855 else
7857 /* FORNOW. In some cases can vectorize even if data-type not supported
7858 (e.g. - array initialization with 0). */
7859 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7860 return false;
7863 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7864 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7865 && memory_access_type != VMAT_GATHER_SCATTER
7866 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7867 if (grouped_store)
7869 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7870 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7871 group_size = DR_GROUP_SIZE (first_stmt_info);
7873 else
7875 first_stmt_info = stmt_info;
7876 first_dr_info = dr_info;
7877 group_size = vec_num = 1;
7880 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7882 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7883 memory_access_type))
7884 return false;
7887 if (!vec_stmt) /* transformation not required. */
7889 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7891 if (loop_vinfo
7892 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7893 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
7894 vls_type, group_size,
7895 memory_access_type, &gs_info,
7896 mask);
7898 if (slp_node
7899 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7900 vectype))
7902 if (dump_enabled_p ())
7903 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7904 "incompatible vector types for invariants\n");
7905 return false;
7908 if (dump_enabled_p ()
7909 && memory_access_type != VMAT_ELEMENTWISE
7910 && memory_access_type != VMAT_GATHER_SCATTER
7911 && alignment_support_scheme != dr_aligned)
7912 dump_printf_loc (MSG_NOTE, vect_location,
7913 "Vectorizing an unaligned access.\n");
7915 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7916 vect_model_store_cost (vinfo, stmt_info, ncopies,
7917 memory_access_type, &gs_info,
7918 alignment_support_scheme,
7919 misalignment, vls_type, slp_node, cost_vec);
7920 return true;
7922 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7924 /* Transform. */
7926 ensure_base_align (dr_info);
7928 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7930 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7931 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7932 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7933 tree ptr, var, scale, vec_mask;
7934 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7935 tree mask_halfvectype = mask_vectype;
7936 edge pe = loop_preheader_edge (loop);
7937 gimple_seq seq;
7938 basic_block new_bb;
7939 enum { NARROW, NONE, WIDEN } modifier;
7940 poly_uint64 scatter_off_nunits
7941 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7943 if (known_eq (nunits, scatter_off_nunits))
7944 modifier = NONE;
7945 else if (known_eq (nunits * 2, scatter_off_nunits))
7947 modifier = WIDEN;
7949 /* Currently gathers and scatters are only supported for
7950 fixed-length vectors. */
7951 unsigned int count = scatter_off_nunits.to_constant ();
7952 vec_perm_builder sel (count, count, 1);
7953 for (i = 0; i < (unsigned int) count; ++i)
7954 sel.quick_push (i | (count / 2));
7956 vec_perm_indices indices (sel, 1, count);
7957 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7958 indices);
7959 gcc_assert (perm_mask != NULL_TREE);
7961 else if (known_eq (nunits, scatter_off_nunits * 2))
7963 modifier = NARROW;
7965 /* Currently gathers and scatters are only supported for
7966 fixed-length vectors. */
7967 unsigned int count = nunits.to_constant ();
7968 vec_perm_builder sel (count, count, 1);
7969 for (i = 0; i < (unsigned int) count; ++i)
7970 sel.quick_push (i | (count / 2));
7972 vec_perm_indices indices (sel, 2, count);
7973 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7974 gcc_assert (perm_mask != NULL_TREE);
7975 ncopies *= 2;
7977 if (mask)
7978 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7980 else
7981 gcc_unreachable ();
7983 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7984 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7985 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7986 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7987 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7988 scaletype = TREE_VALUE (arglist);
7990 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7991 && TREE_CODE (rettype) == VOID_TYPE);
7993 ptr = fold_convert (ptrtype, gs_info.base);
7994 if (!is_gimple_min_invariant (ptr))
7996 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7997 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7998 gcc_assert (!new_bb);
8001 if (mask == NULL_TREE)
8003 mask_arg = build_int_cst (masktype, -1);
8004 mask_arg = vect_init_vector (vinfo, stmt_info,
8005 mask_arg, masktype, NULL);
8008 scale = build_int_cst (scaletype, gs_info.scale);
8010 auto_vec<tree> vec_oprnds0;
8011 auto_vec<tree> vec_oprnds1;
8012 auto_vec<tree> vec_masks;
8013 if (mask)
8015 tree mask_vectype = truth_type_for (vectype);
8016 vect_get_vec_defs_for_operand (vinfo, stmt_info,
8017 modifier == NARROW
8018 ? ncopies / 2 : ncopies,
8019 mask, &vec_masks, mask_vectype);
8021 vect_get_vec_defs_for_operand (vinfo, stmt_info,
8022 modifier == WIDEN
8023 ? ncopies / 2 : ncopies,
8024 gs_info.offset, &vec_oprnds0);
8025 vect_get_vec_defs_for_operand (vinfo, stmt_info,
8026 modifier == NARROW
8027 ? ncopies / 2 : ncopies,
8028 op, &vec_oprnds1);
8029 for (j = 0; j < ncopies; ++j)
8031 if (modifier == WIDEN)
8033 if (j & 1)
8034 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
8035 perm_mask, stmt_info, gsi);
8036 else
8037 op = vec_oprnd0 = vec_oprnds0[j / 2];
8038 src = vec_oprnd1 = vec_oprnds1[j];
8039 if (mask)
8040 mask_op = vec_mask = vec_masks[j];
8042 else if (modifier == NARROW)
8044 if (j & 1)
8045 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
8046 perm_mask, stmt_info, gsi);
8047 else
8048 src = vec_oprnd1 = vec_oprnds1[j / 2];
8049 op = vec_oprnd0 = vec_oprnds0[j];
8050 if (mask)
8051 mask_op = vec_mask = vec_masks[j / 2];
8053 else
8055 op = vec_oprnd0 = vec_oprnds0[j];
8056 src = vec_oprnd1 = vec_oprnds1[j];
8057 if (mask)
8058 mask_op = vec_mask = vec_masks[j];
8061 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
8063 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
8064 TYPE_VECTOR_SUBPARTS (srctype)));
8065 var = vect_get_new_ssa_name (srctype, vect_simple_var);
8066 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
8067 gassign *new_stmt
8068 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
8069 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8070 src = var;
8073 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
8075 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
8076 TYPE_VECTOR_SUBPARTS (idxtype)));
8077 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
8078 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
8079 gassign *new_stmt
8080 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
8081 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8082 op = var;
8085 if (mask)
8087 tree utype;
8088 mask_arg = mask_op;
8089 if (modifier == NARROW)
8091 var = vect_get_new_ssa_name (mask_halfvectype,
8092 vect_simple_var);
8093 gassign *new_stmt
8094 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
8095 : VEC_UNPACK_LO_EXPR,
8096 mask_op);
8097 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8098 mask_arg = var;
8100 tree optype = TREE_TYPE (mask_arg);
8101 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
8102 utype = masktype;
8103 else
8104 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
8105 var = vect_get_new_ssa_name (utype, vect_scalar_var);
8106 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
8107 gassign *new_stmt
8108 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
8109 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8110 mask_arg = var;
8111 if (!useless_type_conversion_p (masktype, utype))
8113 gcc_assert (TYPE_PRECISION (utype)
8114 <= TYPE_PRECISION (masktype));
8115 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
8116 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
8117 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8118 mask_arg = var;
8122 gcall *new_stmt
8123 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
8124 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8126 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8128 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8129 return true;
8131 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
8132 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
8134 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8135 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
8137 if (grouped_store)
8139 /* FORNOW */
8140 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
8142 /* We vectorize all the stmts of the interleaving group when we
8143 reach the last stmt in the group. */
8144 if (DR_GROUP_STORE_COUNT (first_stmt_info)
8145 < DR_GROUP_SIZE (first_stmt_info)
8146 && !slp)
8148 *vec_stmt = NULL;
8149 return true;
8152 if (slp)
8154 grouped_store = false;
8155 /* VEC_NUM is the number of vect stmts to be created for this
8156 group. */
8157 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8158 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8159 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
8160 == first_stmt_info);
8161 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8162 op = vect_get_store_rhs (first_stmt_info);
8164 else
8165 /* VEC_NUM is the number of vect stmts to be created for this
8166 group. */
8167 vec_num = group_size;
8169 ref_type = get_group_alias_ptr_type (first_stmt_info);
8171 else
8172 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8174 if (dump_enabled_p ())
8175 dump_printf_loc (MSG_NOTE, vect_location,
8176 "transform store. ncopies = %d\n", ncopies);
8178 if (memory_access_type == VMAT_ELEMENTWISE
8179 || memory_access_type == VMAT_STRIDED_SLP)
8181 gimple_stmt_iterator incr_gsi;
8182 bool insert_after;
8183 gimple *incr;
8184 tree offvar;
8185 tree ivstep;
8186 tree running_off;
8187 tree stride_base, stride_step, alias_off;
8188 tree vec_oprnd;
8189 tree dr_offset;
8190 unsigned int g;
8191 /* Checked by get_load_store_type. */
8192 unsigned int const_nunits = nunits.to_constant ();
8194 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8195 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
8197 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8198 stride_base
8199 = fold_build_pointer_plus
8200 (DR_BASE_ADDRESS (first_dr_info->dr),
8201 size_binop (PLUS_EXPR,
8202 convert_to_ptrofftype (dr_offset),
8203 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8204 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8206 /* For a store with loop-invariant (but other than power-of-2)
8207 stride (i.e. not a grouped access) like so:
8209 for (i = 0; i < n; i += stride)
8210 array[i] = ...;
8212 we generate a new induction variable and new stores from
8213 the components of the (vectorized) rhs:
8215 for (j = 0; ; j += VF*stride)
8216 vectemp = ...;
8217 tmp1 = vectemp[0];
8218 array[j] = tmp1;
8219 tmp2 = vectemp[1];
8220 array[j + stride] = tmp2;
8224 unsigned nstores = const_nunits;
8225 unsigned lnel = 1;
8226 tree ltype = elem_type;
8227 tree lvectype = vectype;
8228 if (slp)
8230 if (group_size < const_nunits
8231 && const_nunits % group_size == 0)
8233 nstores = const_nunits / group_size;
8234 lnel = group_size;
8235 ltype = build_vector_type (elem_type, group_size);
8236 lvectype = vectype;
8238 /* First check if vec_extract optab doesn't support extraction
8239 of vector elts directly. */
8240 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
8241 machine_mode vmode;
8242 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8243 || !related_vector_mode (TYPE_MODE (vectype), elmode,
8244 group_size).exists (&vmode)
8245 || (convert_optab_handler (vec_extract_optab,
8246 TYPE_MODE (vectype), vmode)
8247 == CODE_FOR_nothing))
8249 /* Try to avoid emitting an extract of vector elements
8250 by performing the extracts using an integer type of the
8251 same size, extracting from a vector of those and then
8252 re-interpreting it as the original vector type if
8253 supported. */
8254 unsigned lsize
8255 = group_size * GET_MODE_BITSIZE (elmode);
8256 unsigned int lnunits = const_nunits / group_size;
8257 /* If we can't construct such a vector fall back to
8258 element extracts from the original vector type and
8259 element size stores. */
8260 if (int_mode_for_size (lsize, 0).exists (&elmode)
8261 && VECTOR_MODE_P (TYPE_MODE (vectype))
8262 && related_vector_mode (TYPE_MODE (vectype), elmode,
8263 lnunits).exists (&vmode)
8264 && (convert_optab_handler (vec_extract_optab,
8265 vmode, elmode)
8266 != CODE_FOR_nothing))
8268 nstores = lnunits;
8269 lnel = group_size;
8270 ltype = build_nonstandard_integer_type (lsize, 1);
8271 lvectype = build_vector_type (ltype, nstores);
8273 /* Else fall back to vector extraction anyway.
8274 Fewer stores are more important than avoiding spilling
8275 of the vector we extract from. Compared to the
8276 construction case in vectorizable_load no store-forwarding
8277 issue exists here for reasonable archs. */
8280 else if (group_size >= const_nunits
8281 && group_size % const_nunits == 0)
8283 nstores = 1;
8284 lnel = const_nunits;
8285 ltype = vectype;
8286 lvectype = vectype;
8288 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8289 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8292 ivstep = stride_step;
8293 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8294 build_int_cst (TREE_TYPE (ivstep), vf));
8296 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8298 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8299 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8300 create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
8301 loop, &incr_gsi, insert_after,
8302 &offvar, NULL);
8303 incr = gsi_stmt (incr_gsi);
8305 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8307 alias_off = build_int_cst (ref_type, 0);
8308 stmt_vec_info next_stmt_info = first_stmt_info;
8309 for (g = 0; g < group_size; g++)
8311 running_off = offvar;
8312 if (g)
8314 tree size = TYPE_SIZE_UNIT (ltype);
8315 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
8316 size);
8317 tree newoff = copy_ssa_name (running_off, NULL);
8318 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8319 running_off, pos);
8320 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8321 running_off = newoff;
8323 if (!slp)
8324 op = vect_get_store_rhs (next_stmt_info);
8325 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
8326 op, &vec_oprnds);
8327 unsigned int group_el = 0;
8328 unsigned HOST_WIDE_INT
8329 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8330 for (j = 0; j < ncopies; j++)
8332 vec_oprnd = vec_oprnds[j];
8333 /* Pun the vector to extract from if necessary. */
8334 if (lvectype != vectype)
8336 tree tem = make_ssa_name (lvectype);
8337 gimple *pun
8338 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
8339 lvectype, vec_oprnd));
8340 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8341 vec_oprnd = tem;
8343 for (i = 0; i < nstores; i++)
8345 tree newref, newoff;
8346 gimple *incr, *assign;
8347 tree size = TYPE_SIZE (ltype);
8348 /* Extract the i'th component. */
8349 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8350 bitsize_int (i), size);
8351 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8352 size, pos);
8354 elem = force_gimple_operand_gsi (gsi, elem, true,
8355 NULL_TREE, true,
8356 GSI_SAME_STMT);
8358 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8359 group_el * elsz);
8360 newref = build2 (MEM_REF, ltype,
8361 running_off, this_off);
8362 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8364 /* And store it to *running_off. */
8365 assign = gimple_build_assign (newref, elem);
8366 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8368 group_el += lnel;
8369 if (! slp
8370 || group_el == group_size)
8372 newoff = copy_ssa_name (running_off, NULL);
8373 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8374 running_off, stride_step);
8375 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8377 running_off = newoff;
8378 group_el = 0;
8380 if (g == group_size - 1
8381 && !slp)
8383 if (j == 0 && i == 0)
8384 *vec_stmt = assign;
8385 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8389 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8390 vec_oprnds.release ();
8391 if (slp)
8392 break;
8395 return true;
8398 auto_vec<tree> dr_chain (group_size);
8399 oprnds.create (group_size);
8401 gcc_assert (alignment_support_scheme);
8402 vec_loop_masks *loop_masks
8403 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8404 ? &LOOP_VINFO_MASKS (loop_vinfo)
8405 : NULL);
8406 vec_loop_lens *loop_lens
8407 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8408 ? &LOOP_VINFO_LENS (loop_vinfo)
8409 : NULL);
8411 /* Shouldn't go with length-based approach if fully masked. */
8412 gcc_assert (!loop_lens || !loop_masks);
8414 /* Targets with store-lane instructions must not require explicit
8415 realignment. vect_supportable_dr_alignment always returns either
8416 dr_aligned or dr_unaligned_supported for masked operations. */
8417 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8418 && !mask
8419 && !loop_masks)
8420 || alignment_support_scheme == dr_aligned
8421 || alignment_support_scheme == dr_unaligned_supported);
8423 tree offset = NULL_TREE;
8424 if (!known_eq (poffset, 0))
8425 offset = size_int (poffset);
8427 tree bump;
8428 tree vec_offset = NULL_TREE;
8429 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8431 aggr_type = NULL_TREE;
8432 bump = NULL_TREE;
8434 else if (memory_access_type == VMAT_GATHER_SCATTER)
8436 aggr_type = elem_type;
8437 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8438 &bump, &vec_offset);
8440 else
8442 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8443 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8444 else
8445 aggr_type = vectype;
8446 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
8447 memory_access_type);
8450 if (mask)
8451 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8453 /* In case the vectorization factor (VF) is bigger than the number
8454 of elements that we can fit in a vectype (nunits), we have to generate
8455 more than one vector stmt - i.e - we need to "unroll" the
8456 vector stmt by a factor VF/nunits. */
8458 /* In case of interleaving (non-unit grouped access):
8460 S1: &base + 2 = x2
8461 S2: &base = x0
8462 S3: &base + 1 = x1
8463 S4: &base + 3 = x3
8465 We create vectorized stores starting from base address (the access of the
8466 first stmt in the chain (S2 in the above example), when the last store stmt
8467 of the chain (S4) is reached:
8469 VS1: &base = vx2
8470 VS2: &base + vec_size*1 = vx0
8471 VS3: &base + vec_size*2 = vx1
8472 VS4: &base + vec_size*3 = vx3
8474 Then permutation statements are generated:
8476 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8477 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8480 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8481 (the order of the data-refs in the output of vect_permute_store_chain
8482 corresponds to the order of scalar stmts in the interleaving chain - see
8483 the documentation of vect_permute_store_chain()).
8485 In case of both multiple types and interleaving, above vector stores and
8486 permutation stmts are created for every copy. The result vector stmts are
8487 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8488 STMT_VINFO_RELATED_STMT for the next copies.
8491 auto_vec<tree> vec_masks;
8492 tree vec_mask = NULL;
8493 auto_vec<tree> vec_offsets;
8494 auto_vec<vec<tree> > gvec_oprnds;
8495 gvec_oprnds.safe_grow_cleared (group_size, true);
8496 for (j = 0; j < ncopies; j++)
8498 gimple *new_stmt;
8499 if (j == 0)
8501 if (slp)
8503 /* Get vectorized arguments for SLP_NODE. */
8504 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
8505 op, &vec_oprnds);
8506 vec_oprnd = vec_oprnds[0];
8508 else
8510 /* For interleaved stores we collect vectorized defs for all the
8511 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8512 used as an input to vect_permute_store_chain().
8514 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8515 and OPRNDS are of size 1. */
8516 stmt_vec_info next_stmt_info = first_stmt_info;
8517 for (i = 0; i < group_size; i++)
8519 /* Since gaps are not supported for interleaved stores,
8520 DR_GROUP_SIZE is the exact number of stmts in the chain.
8521 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8522 that there is no interleaving, DR_GROUP_SIZE is 1,
8523 and only one iteration of the loop will be executed. */
8524 op = vect_get_store_rhs (next_stmt_info);
8525 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8526 ncopies, op, &gvec_oprnds[i]);
8527 vec_oprnd = gvec_oprnds[i][0];
8528 dr_chain.quick_push (gvec_oprnds[i][0]);
8529 oprnds.quick_push (gvec_oprnds[i][0]);
8530 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8532 if (mask)
8534 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8535 mask, &vec_masks, mask_vectype);
8536 vec_mask = vec_masks[0];
8540 /* We should have catched mismatched types earlier. */
8541 gcc_assert (useless_type_conversion_p (vectype,
8542 TREE_TYPE (vec_oprnd)));
8543 bool simd_lane_access_p
8544 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8545 if (simd_lane_access_p
8546 && !loop_masks
8547 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8548 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8549 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
8550 && integer_zerop (DR_INIT (first_dr_info->dr))
8551 && alias_sets_conflict_p (get_alias_set (aggr_type),
8552 get_alias_set (TREE_TYPE (ref_type))))
8554 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8555 dataref_offset = build_int_cst (ref_type, 0);
8557 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8558 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8559 slp_node, &gs_info, &dataref_ptr,
8560 &vec_offsets);
8561 else
8562 dataref_ptr
8563 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8564 simd_lane_access_p ? loop : NULL,
8565 offset, &dummy, gsi, &ptr_incr,
8566 simd_lane_access_p, bump);
8568 else
8570 /* For interleaved stores we created vectorized defs for all the
8571 defs stored in OPRNDS in the previous iteration (previous copy).
8572 DR_CHAIN is then used as an input to vect_permute_store_chain().
8573 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8574 OPRNDS are of size 1. */
8575 for (i = 0; i < group_size; i++)
8577 vec_oprnd = gvec_oprnds[i][j];
8578 dr_chain[i] = gvec_oprnds[i][j];
8579 oprnds[i] = gvec_oprnds[i][j];
8581 if (mask)
8582 vec_mask = vec_masks[j];
8583 if (dataref_offset)
8584 dataref_offset
8585 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8586 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8587 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8588 stmt_info, bump);
8591 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8593 tree vec_array;
8595 /* Get an array into which we can store the individual vectors. */
8596 vec_array = create_vector_array (vectype, vec_num);
8598 /* Invalidate the current contents of VEC_ARRAY. This should
8599 become an RTL clobber too, which prevents the vector registers
8600 from being upward-exposed. */
8601 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8603 /* Store the individual vectors into the array. */
8604 for (i = 0; i < vec_num; i++)
8606 vec_oprnd = dr_chain[i];
8607 write_vector_array (vinfo, stmt_info,
8608 gsi, vec_oprnd, vec_array, i);
8611 tree final_mask = NULL;
8612 if (loop_masks)
8613 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8614 vectype, j);
8615 if (vec_mask)
8616 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8617 final_mask, vec_mask, gsi);
8619 gcall *call;
8620 if (final_mask)
8622 /* Emit:
8623 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8624 VEC_ARRAY). */
8625 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8626 tree alias_ptr = build_int_cst (ref_type, align);
8627 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8628 dataref_ptr, alias_ptr,
8629 final_mask, vec_array);
8631 else
8633 /* Emit:
8634 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8635 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8636 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8637 vec_array);
8638 gimple_call_set_lhs (call, data_ref);
8640 gimple_call_set_nothrow (call, true);
8641 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8642 new_stmt = call;
8644 /* Record that VEC_ARRAY is now dead. */
8645 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8647 else
8649 new_stmt = NULL;
8650 if (grouped_store)
8652 if (j == 0)
8653 result_chain.create (group_size);
8654 /* Permute. */
8655 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8656 gsi, &result_chain);
8659 stmt_vec_info next_stmt_info = first_stmt_info;
8660 for (i = 0; i < vec_num; i++)
8662 unsigned misalign;
8663 unsigned HOST_WIDE_INT align;
8665 tree final_mask = NULL_TREE;
8666 if (loop_masks)
8667 final_mask = vect_get_loop_mask (gsi, loop_masks,
8668 vec_num * ncopies,
8669 vectype, vec_num * j + i);
8670 if (vec_mask)
8671 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8672 final_mask, vec_mask, gsi);
8674 if (memory_access_type == VMAT_GATHER_SCATTER
8675 && gs_info.ifn != IFN_LAST)
8677 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8678 vec_offset = vec_offsets[vec_num * j + i];
8679 tree scale = size_int (gs_info.scale);
8680 gcall *call;
8681 if (final_mask)
8682 call = gimple_build_call_internal
8683 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8684 scale, vec_oprnd, final_mask);
8685 else
8686 call = gimple_build_call_internal
8687 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8688 scale, vec_oprnd);
8689 gimple_call_set_nothrow (call, true);
8690 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8691 new_stmt = call;
8692 break;
8694 else if (memory_access_type == VMAT_GATHER_SCATTER)
8696 /* Emulated scatter. */
8697 gcc_assert (!final_mask);
8698 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
8699 unsigned HOST_WIDE_INT const_offset_nunits
8700 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
8701 .to_constant ();
8702 vec<constructor_elt, va_gc> *ctor_elts;
8703 vec_alloc (ctor_elts, const_nunits);
8704 gimple_seq stmts = NULL;
8705 tree elt_type = TREE_TYPE (vectype);
8706 unsigned HOST_WIDE_INT elt_size
8707 = tree_to_uhwi (TYPE_SIZE (elt_type));
8708 /* We support offset vectors with more elements
8709 than the data vector for now. */
8710 unsigned HOST_WIDE_INT factor
8711 = const_offset_nunits / const_nunits;
8712 vec_offset = vec_offsets[j / factor];
8713 unsigned elt_offset = (j % factor) * const_nunits;
8714 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
8715 tree scale = size_int (gs_info.scale);
8716 align = get_object_alignment (DR_REF (first_dr_info->dr));
8717 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
8718 for (unsigned k = 0; k < const_nunits; ++k)
8720 /* Compute the offsetted pointer. */
8721 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
8722 bitsize_int (k + elt_offset));
8723 tree idx = gimple_build (&stmts, BIT_FIELD_REF,
8724 idx_type, vec_offset,
8725 TYPE_SIZE (idx_type), boff);
8726 idx = gimple_convert (&stmts, sizetype, idx);
8727 idx = gimple_build (&stmts, MULT_EXPR,
8728 sizetype, idx, scale);
8729 tree ptr = gimple_build (&stmts, PLUS_EXPR,
8730 TREE_TYPE (dataref_ptr),
8731 dataref_ptr, idx);
8732 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
8733 /* Extract the element to be stored. */
8734 tree elt = gimple_build (&stmts, BIT_FIELD_REF,
8735 TREE_TYPE (vectype), vec_oprnd,
8736 TYPE_SIZE (elt_type),
8737 bitsize_int (k * elt_size));
8738 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
8739 stmts = NULL;
8740 tree ref = build2 (MEM_REF, ltype, ptr,
8741 build_int_cst (ref_type, 0));
8742 new_stmt = gimple_build_assign (ref, elt);
8743 vect_finish_stmt_generation (vinfo, stmt_info,
8744 new_stmt, gsi);
8746 break;
8749 if (i > 0)
8750 /* Bump the vector pointer. */
8751 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8752 gsi, stmt_info, bump);
8754 if (slp)
8755 vec_oprnd = vec_oprnds[i];
8756 else if (grouped_store)
8757 /* For grouped stores vectorized defs are interleaved in
8758 vect_permute_store_chain(). */
8759 vec_oprnd = result_chain[i];
8761 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8762 if (alignment_support_scheme == dr_aligned)
8763 misalign = 0;
8764 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
8766 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8767 misalign = 0;
8769 else
8770 misalign = misalignment;
8771 if (dataref_offset == NULL_TREE
8772 && TREE_CODE (dataref_ptr) == SSA_NAME)
8773 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8774 misalign);
8775 align = least_bit_hwi (misalign | align);
8777 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8779 tree perm_mask = perm_mask_for_reverse (vectype);
8780 tree perm_dest = vect_create_destination_var
8781 (vect_get_store_rhs (stmt_info), vectype);
8782 tree new_temp = make_ssa_name (perm_dest);
8784 /* Generate the permute statement. */
8785 gimple *perm_stmt
8786 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8787 vec_oprnd, perm_mask);
8788 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8790 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8791 vec_oprnd = new_temp;
8794 /* Arguments are ready. Create the new vector stmt. */
8795 if (final_mask)
8797 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8798 gcall *call
8799 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8800 dataref_ptr, ptr,
8801 final_mask, vec_oprnd);
8802 gimple_call_set_nothrow (call, true);
8803 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8804 new_stmt = call;
8806 else if (loop_lens)
8808 machine_mode vmode = TYPE_MODE (vectype);
8809 opt_machine_mode new_ovmode
8810 = get_len_load_store_mode (vmode, false);
8811 machine_mode new_vmode = new_ovmode.require ();
8812 unsigned factor
8813 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
8814 tree final_len
8815 = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
8816 vec_num * ncopies, vectype,
8817 vec_num * j + i, factor);
8818 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8819 /* Need conversion if it's wrapped with VnQI. */
8820 if (vmode != new_vmode)
8822 tree new_vtype
8823 = build_vector_type_for_mode (unsigned_intQI_type_node,
8824 new_vmode);
8825 tree var
8826 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
8827 vec_oprnd
8828 = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8829 gassign *new_stmt
8830 = gimple_build_assign (var, VIEW_CONVERT_EXPR,
8831 vec_oprnd);
8832 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8833 gsi);
8834 vec_oprnd = var;
8837 signed char biasval =
8838 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8840 tree bias = build_int_cst (intQI_type_node, biasval);
8841 gcall *call
8842 = gimple_build_call_internal (IFN_LEN_STORE, 5, dataref_ptr,
8843 ptr, final_len, vec_oprnd,
8844 bias);
8845 gimple_call_set_nothrow (call, true);
8846 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8847 new_stmt = call;
8849 else
8851 data_ref = fold_build2 (MEM_REF, vectype,
8852 dataref_ptr,
8853 dataref_offset
8854 ? dataref_offset
8855 : build_int_cst (ref_type, 0));
8856 if (alignment_support_scheme == dr_aligned)
8858 else
8859 TREE_TYPE (data_ref)
8860 = build_aligned_type (TREE_TYPE (data_ref),
8861 align * BITS_PER_UNIT);
8862 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8863 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8864 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8867 if (slp)
8868 continue;
8870 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8871 if (!next_stmt_info)
8872 break;
8875 if (!slp)
8877 if (j == 0)
8878 *vec_stmt = new_stmt;
8879 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8883 for (i = 0; i < group_size; ++i)
8885 vec<tree> oprndsi = gvec_oprnds[i];
8886 oprndsi.release ();
8888 oprnds.release ();
8889 result_chain.release ();
8890 vec_oprnds.release ();
8892 return true;
8895 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8896 VECTOR_CST mask. No checks are made that the target platform supports the
8897 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8898 vect_gen_perm_mask_checked. */
8900 tree
8901 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8903 tree mask_type;
8905 poly_uint64 nunits = sel.length ();
8906 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8908 mask_type = build_vector_type (ssizetype, nunits);
8909 return vec_perm_indices_to_tree (mask_type, sel);
8912 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8913 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8915 tree
8916 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8918 machine_mode vmode = TYPE_MODE (vectype);
8919 gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
8920 return vect_gen_perm_mask_any (vectype, sel);
8923 /* Given a vector variable X and Y, that was generated for the scalar
8924 STMT_INFO, generate instructions to permute the vector elements of X and Y
8925 using permutation mask MASK_VEC, insert them at *GSI and return the
8926 permuted vector variable. */
8928 static tree
8929 permute_vec_elements (vec_info *vinfo,
8930 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8931 gimple_stmt_iterator *gsi)
8933 tree vectype = TREE_TYPE (x);
8934 tree perm_dest, data_ref;
8935 gimple *perm_stmt;
8937 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8938 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8939 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8940 else
8941 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8942 data_ref = make_ssa_name (perm_dest);
8944 /* Generate the permute statement. */
8945 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8946 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8948 return data_ref;
8951 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8952 inserting them on the loops preheader edge. Returns true if we
8953 were successful in doing so (and thus STMT_INFO can be moved then),
8954 otherwise returns false. */
8956 static bool
8957 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8959 ssa_op_iter i;
8960 tree op;
8961 bool any = false;
8963 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8965 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8966 if (!gimple_nop_p (def_stmt)
8967 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8969 /* Make sure we don't need to recurse. While we could do
8970 so in simple cases when there are more complex use webs
8971 we don't have an easy way to preserve stmt order to fulfil
8972 dependencies within them. */
8973 tree op2;
8974 ssa_op_iter i2;
8975 if (gimple_code (def_stmt) == GIMPLE_PHI)
8976 return false;
8977 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8979 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8980 if (!gimple_nop_p (def_stmt2)
8981 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8982 return false;
8984 any = true;
8988 if (!any)
8989 return true;
8991 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8993 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8994 if (!gimple_nop_p (def_stmt)
8995 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8997 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8998 gsi_remove (&gsi, false);
8999 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
9003 return true;
9006 /* vectorizable_load.
9008 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
9009 that can be vectorized.
9010 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9011 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
9012 Return true if STMT_INFO is vectorizable in this way. */
9014 static bool
9015 vectorizable_load (vec_info *vinfo,
9016 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9017 gimple **vec_stmt, slp_tree slp_node,
9018 stmt_vector_for_cost *cost_vec)
9020 tree scalar_dest;
9021 tree vec_dest = NULL;
9022 tree data_ref = NULL;
9023 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9024 class loop *loop = NULL;
9025 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
9026 bool nested_in_vect_loop = false;
9027 tree elem_type;
9028 tree new_temp;
9029 machine_mode mode;
9030 tree dummy;
9031 tree dataref_ptr = NULL_TREE;
9032 tree dataref_offset = NULL_TREE;
9033 gimple *ptr_incr = NULL;
9034 int ncopies;
9035 int i, j;
9036 unsigned int group_size;
9037 poly_uint64 group_gap_adj;
9038 tree msq = NULL_TREE, lsq;
9039 tree realignment_token = NULL_TREE;
9040 gphi *phi = NULL;
9041 vec<tree> dr_chain = vNULL;
9042 bool grouped_load = false;
9043 stmt_vec_info first_stmt_info;
9044 stmt_vec_info first_stmt_info_for_drptr = NULL;
9045 bool compute_in_loop = false;
9046 class loop *at_loop;
9047 int vec_num;
9048 bool slp = (slp_node != NULL);
9049 bool slp_perm = false;
9050 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9051 poly_uint64 vf;
9052 tree aggr_type;
9053 gather_scatter_info gs_info;
9054 tree ref_type;
9055 enum vect_def_type mask_dt = vect_unknown_def_type;
9057 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9058 return false;
9060 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9061 && ! vec_stmt)
9062 return false;
9064 if (!STMT_VINFO_DATA_REF (stmt_info))
9065 return false;
9067 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
9068 int mask_index = -1;
9069 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
9071 scalar_dest = gimple_assign_lhs (assign);
9072 if (TREE_CODE (scalar_dest) != SSA_NAME)
9073 return false;
9075 tree_code code = gimple_assign_rhs_code (assign);
9076 if (code != ARRAY_REF
9077 && code != BIT_FIELD_REF
9078 && code != INDIRECT_REF
9079 && code != COMPONENT_REF
9080 && code != IMAGPART_EXPR
9081 && code != REALPART_EXPR
9082 && code != MEM_REF
9083 && TREE_CODE_CLASS (code) != tcc_declaration)
9084 return false;
9086 else
9088 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9089 if (!call || !gimple_call_internal_p (call))
9090 return false;
9092 internal_fn ifn = gimple_call_internal_fn (call);
9093 if (!internal_load_fn_p (ifn))
9094 return false;
9096 scalar_dest = gimple_call_lhs (call);
9097 if (!scalar_dest)
9098 return false;
9100 mask_index = internal_fn_mask_index (ifn);
9101 /* ??? For SLP the mask operand is always last. */
9102 if (mask_index >= 0 && slp_node)
9103 mask_index = SLP_TREE_CHILDREN (slp_node).length () - 1;
9104 if (mask_index >= 0
9105 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
9106 &mask, NULL, &mask_dt, &mask_vectype))
9107 return false;
9110 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9111 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9113 if (loop_vinfo)
9115 loop = LOOP_VINFO_LOOP (loop_vinfo);
9116 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
9117 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9119 else
9120 vf = 1;
9122 /* Multiple types in SLP are handled by creating the appropriate number of
9123 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
9124 case of SLP. */
9125 if (slp)
9126 ncopies = 1;
9127 else
9128 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9130 gcc_assert (ncopies >= 1);
9132 /* FORNOW. This restriction should be relaxed. */
9133 if (nested_in_vect_loop && ncopies > 1)
9135 if (dump_enabled_p ())
9136 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9137 "multiple types in nested loop.\n");
9138 return false;
9141 /* Invalidate assumptions made by dependence analysis when vectorization
9142 on the unrolled body effectively re-orders stmts. */
9143 if (ncopies > 1
9144 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9145 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9146 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9148 if (dump_enabled_p ())
9149 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9150 "cannot perform implicit CSE when unrolling "
9151 "with negative dependence distance\n");
9152 return false;
9155 elem_type = TREE_TYPE (vectype);
9156 mode = TYPE_MODE (vectype);
9158 /* FORNOW. In some cases can vectorize even if data-type not supported
9159 (e.g. - data copies). */
9160 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
9162 if (dump_enabled_p ())
9163 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9164 "Aligned load, but unsupported type.\n");
9165 return false;
9168 /* Check if the load is a part of an interleaving chain. */
9169 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
9171 grouped_load = true;
9172 /* FORNOW */
9173 gcc_assert (!nested_in_vect_loop);
9174 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9176 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9177 group_size = DR_GROUP_SIZE (first_stmt_info);
9179 /* Refuse non-SLP vectorization of SLP-only groups. */
9180 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
9182 if (dump_enabled_p ())
9183 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9184 "cannot vectorize load in non-SLP mode.\n");
9185 return false;
9188 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9190 slp_perm = true;
9192 if (!loop_vinfo)
9194 /* In BB vectorization we may not actually use a loaded vector
9195 accessing elements in excess of DR_GROUP_SIZE. */
9196 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9197 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
9198 unsigned HOST_WIDE_INT nunits;
9199 unsigned j, k, maxk = 0;
9200 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
9201 if (k > maxk)
9202 maxk = k;
9203 tree vectype = SLP_TREE_VECTYPE (slp_node);
9204 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
9205 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
9207 if (dump_enabled_p ())
9208 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9209 "BB vectorization with gaps at the end of "
9210 "a load is not supported\n");
9211 return false;
9215 auto_vec<tree> tem;
9216 unsigned n_perms;
9217 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
9218 true, &n_perms))
9220 if (dump_enabled_p ())
9221 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
9222 vect_location,
9223 "unsupported load permutation\n");
9224 return false;
9228 /* Invalidate assumptions made by dependence analysis when vectorization
9229 on the unrolled body effectively re-orders stmts. */
9230 if (!PURE_SLP_STMT (stmt_info)
9231 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9232 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9233 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9235 if (dump_enabled_p ())
9236 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9237 "cannot perform implicit CSE when performing "
9238 "group loads with negative dependence distance\n");
9239 return false;
9242 else
9243 group_size = 1;
9245 vect_memory_access_type memory_access_type;
9246 enum dr_alignment_support alignment_support_scheme;
9247 int misalignment;
9248 poly_int64 poffset;
9249 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
9250 ncopies, &memory_access_type, &poffset,
9251 &alignment_support_scheme, &misalignment, &gs_info))
9252 return false;
9254 if (mask)
9256 if (memory_access_type == VMAT_CONTIGUOUS)
9258 machine_mode vec_mode = TYPE_MODE (vectype);
9259 if (!VECTOR_MODE_P (vec_mode)
9260 || !can_vec_mask_load_store_p (vec_mode,
9261 TYPE_MODE (mask_vectype), true))
9262 return false;
9264 else if (memory_access_type != VMAT_LOAD_STORE_LANES
9265 && memory_access_type != VMAT_GATHER_SCATTER)
9267 if (dump_enabled_p ())
9268 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9269 "unsupported access type for masked load.\n");
9270 return false;
9272 else if (memory_access_type == VMAT_GATHER_SCATTER
9273 && gs_info.ifn == IFN_LAST
9274 && !gs_info.decl)
9276 if (dump_enabled_p ())
9277 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9278 "unsupported masked emulated gather.\n");
9279 return false;
9283 if (!vec_stmt) /* transformation not required. */
9285 if (slp_node
9286 && mask
9287 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
9288 mask_vectype))
9290 if (dump_enabled_p ())
9291 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9292 "incompatible vector types for invariants\n");
9293 return false;
9296 if (!slp)
9297 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
9299 if (loop_vinfo
9300 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
9301 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
9302 VLS_LOAD, group_size,
9303 memory_access_type, &gs_info,
9304 mask);
9306 if (dump_enabled_p ()
9307 && memory_access_type != VMAT_ELEMENTWISE
9308 && memory_access_type != VMAT_GATHER_SCATTER
9309 && alignment_support_scheme != dr_aligned)
9310 dump_printf_loc (MSG_NOTE, vect_location,
9311 "Vectorizing an unaligned access.\n");
9313 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9314 vinfo->any_known_not_updated_vssa = true;
9316 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
9317 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
9318 alignment_support_scheme, misalignment,
9319 &gs_info, slp_node, cost_vec);
9320 return true;
9323 if (!slp)
9324 gcc_assert (memory_access_type
9325 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
9327 if (dump_enabled_p ())
9328 dump_printf_loc (MSG_NOTE, vect_location,
9329 "transform load. ncopies = %d\n", ncopies);
9331 /* Transform. */
9333 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
9334 ensure_base_align (dr_info);
9336 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
9338 vect_build_gather_load_calls (vinfo,
9339 stmt_info, gsi, vec_stmt, &gs_info, mask);
9340 return true;
9343 if (memory_access_type == VMAT_INVARIANT)
9345 gcc_assert (!grouped_load && !mask && !bb_vinfo);
9346 /* If we have versioned for aliasing or the loop doesn't
9347 have any data dependencies that would preclude this,
9348 then we are sure this is a loop invariant load and
9349 thus we can insert it on the preheader edge. */
9350 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
9351 && !nested_in_vect_loop
9352 && hoist_defs_of_uses (stmt_info, loop));
9353 if (hoist_p)
9355 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
9356 if (dump_enabled_p ())
9357 dump_printf_loc (MSG_NOTE, vect_location,
9358 "hoisting out of the vectorized loop: %G",
9359 (gimple *) stmt);
9360 scalar_dest = copy_ssa_name (scalar_dest);
9361 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
9362 edge pe = loop_preheader_edge (loop);
9363 gphi *vphi = get_virtual_phi (loop->header);
9364 tree vuse;
9365 if (vphi)
9366 vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
9367 else
9368 vuse = gimple_vuse (gsi_stmt (*gsi));
9369 gimple *new_stmt = gimple_build_assign (scalar_dest, rhs);
9370 gimple_set_vuse (new_stmt, vuse);
9371 gsi_insert_on_edge_immediate (pe, new_stmt);
9373 /* These copies are all equivalent, but currently the representation
9374 requires a separate STMT_VINFO_VEC_STMT for each one. */
9375 gimple_stmt_iterator gsi2 = *gsi;
9376 gsi_next (&gsi2);
9377 for (j = 0; j < ncopies; j++)
9379 if (hoist_p)
9380 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9381 vectype, NULL);
9382 else
9383 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9384 vectype, &gsi2);
9385 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
9386 if (slp)
9387 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9388 else
9390 if (j == 0)
9391 *vec_stmt = new_stmt;
9392 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9395 return true;
9398 if (memory_access_type == VMAT_ELEMENTWISE
9399 || memory_access_type == VMAT_STRIDED_SLP)
9401 gimple_stmt_iterator incr_gsi;
9402 bool insert_after;
9403 tree offvar;
9404 tree ivstep;
9405 tree running_off;
9406 vec<constructor_elt, va_gc> *v = NULL;
9407 tree stride_base, stride_step, alias_off;
9408 /* Checked by get_load_store_type. */
9409 unsigned int const_nunits = nunits.to_constant ();
9410 unsigned HOST_WIDE_INT cst_offset = 0;
9411 tree dr_offset;
9413 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
9414 gcc_assert (!nested_in_vect_loop);
9416 if (grouped_load)
9418 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9419 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9421 else
9423 first_stmt_info = stmt_info;
9424 first_dr_info = dr_info;
9426 if (slp && grouped_load)
9428 group_size = DR_GROUP_SIZE (first_stmt_info);
9429 ref_type = get_group_alias_ptr_type (first_stmt_info);
9431 else
9433 if (grouped_load)
9434 cst_offset
9435 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
9436 * vect_get_place_in_interleaving_chain (stmt_info,
9437 first_stmt_info));
9438 group_size = 1;
9439 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
9442 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
9443 stride_base
9444 = fold_build_pointer_plus
9445 (DR_BASE_ADDRESS (first_dr_info->dr),
9446 size_binop (PLUS_EXPR,
9447 convert_to_ptrofftype (dr_offset),
9448 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
9449 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
9451 /* For a load with loop-invariant (but other than power-of-2)
9452 stride (i.e. not a grouped access) like so:
9454 for (i = 0; i < n; i += stride)
9455 ... = array[i];
9457 we generate a new induction variable and new accesses to
9458 form a new vector (or vectors, depending on ncopies):
9460 for (j = 0; ; j += VF*stride)
9461 tmp1 = array[j];
9462 tmp2 = array[j + stride];
9464 vectemp = {tmp1, tmp2, ...}
9467 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
9468 build_int_cst (TREE_TYPE (stride_step), vf));
9470 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
9472 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
9473 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
9474 create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
9475 loop, &incr_gsi, insert_after,
9476 &offvar, NULL);
9478 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
9480 running_off = offvar;
9481 alias_off = build_int_cst (ref_type, 0);
9482 int nloads = const_nunits;
9483 int lnel = 1;
9484 tree ltype = TREE_TYPE (vectype);
9485 tree lvectype = vectype;
9486 auto_vec<tree> dr_chain;
9487 if (memory_access_type == VMAT_STRIDED_SLP)
9489 if (group_size < const_nunits)
9491 /* First check if vec_init optab supports construction from vector
9492 elts directly. Otherwise avoid emitting a constructor of
9493 vector elements by performing the loads using an integer type
9494 of the same size, constructing a vector of those and then
9495 re-interpreting it as the original vector type. This avoids a
9496 huge runtime penalty due to the general inability to perform
9497 store forwarding from smaller stores to a larger load. */
9498 tree ptype;
9499 tree vtype
9500 = vector_vector_composition_type (vectype,
9501 const_nunits / group_size,
9502 &ptype);
9503 if (vtype != NULL_TREE)
9505 nloads = const_nunits / group_size;
9506 lnel = group_size;
9507 lvectype = vtype;
9508 ltype = ptype;
9511 else
9513 nloads = 1;
9514 lnel = const_nunits;
9515 ltype = vectype;
9517 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
9519 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9520 else if (nloads == 1)
9521 ltype = vectype;
9523 if (slp)
9525 /* For SLP permutation support we need to load the whole group,
9526 not only the number of vector stmts the permutation result
9527 fits in. */
9528 if (slp_perm)
9530 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9531 variable VF. */
9532 unsigned int const_vf = vf.to_constant ();
9533 ncopies = CEIL (group_size * const_vf, const_nunits);
9534 dr_chain.create (ncopies);
9536 else
9537 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9539 unsigned int group_el = 0;
9540 unsigned HOST_WIDE_INT
9541 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9542 unsigned int n_groups = 0;
9543 for (j = 0; j < ncopies; j++)
9545 if (nloads > 1)
9546 vec_alloc (v, nloads);
9547 gimple *new_stmt = NULL;
9548 for (i = 0; i < nloads; i++)
9550 tree this_off = build_int_cst (TREE_TYPE (alias_off),
9551 group_el * elsz + cst_offset);
9552 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9553 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9554 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
9555 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9556 if (nloads > 1)
9557 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9558 gimple_assign_lhs (new_stmt));
9560 group_el += lnel;
9561 if (! slp
9562 || group_el == group_size)
9564 n_groups++;
9565 /* When doing SLP make sure to not load elements from
9566 the next vector iteration, those will not be accessed
9567 so just use the last element again. See PR107451. */
9568 if (!slp || known_lt (n_groups, vf))
9570 tree newoff = copy_ssa_name (running_off);
9571 gimple *incr
9572 = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9573 running_off, stride_step);
9574 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
9575 running_off = newoff;
9577 group_el = 0;
9580 if (nloads > 1)
9582 tree vec_inv = build_constructor (lvectype, v);
9583 new_temp = vect_init_vector (vinfo, stmt_info,
9584 vec_inv, lvectype, gsi);
9585 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9586 if (lvectype != vectype)
9588 new_stmt = gimple_build_assign (make_ssa_name (vectype),
9589 VIEW_CONVERT_EXPR,
9590 build1 (VIEW_CONVERT_EXPR,
9591 vectype, new_temp));
9592 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9596 if (slp)
9598 if (slp_perm)
9599 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
9600 else
9601 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9603 else
9605 if (j == 0)
9606 *vec_stmt = new_stmt;
9607 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9610 if (slp_perm)
9612 unsigned n_perms;
9613 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
9614 false, &n_perms);
9616 return true;
9619 if (memory_access_type == VMAT_GATHER_SCATTER
9620 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9621 grouped_load = false;
9623 if (grouped_load)
9625 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9626 group_size = DR_GROUP_SIZE (first_stmt_info);
9627 /* For SLP vectorization we directly vectorize a subchain
9628 without permutation. */
9629 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9630 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9631 /* For BB vectorization always use the first stmt to base
9632 the data ref pointer on. */
9633 if (bb_vinfo)
9634 first_stmt_info_for_drptr
9635 = vect_find_first_scalar_stmt_in_slp (slp_node);
9637 /* Check if the chain of loads is already vectorized. */
9638 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
9639 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9640 ??? But we can only do so if there is exactly one
9641 as we have no way to get at the rest. Leave the CSE
9642 opportunity alone.
9643 ??? With the group load eventually participating
9644 in multiple different permutations (having multiple
9645 slp nodes which refer to the same group) the CSE
9646 is even wrong code. See PR56270. */
9647 && !slp)
9649 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9650 return true;
9652 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9653 group_gap_adj = 0;
9655 /* VEC_NUM is the number of vect stmts to be created for this group. */
9656 if (slp)
9658 grouped_load = false;
9659 /* If an SLP permutation is from N elements to N elements,
9660 and if one vector holds a whole number of N, we can load
9661 the inputs to the permutation in the same way as an
9662 unpermuted sequence. In other cases we need to load the
9663 whole group, not only the number of vector stmts the
9664 permutation result fits in. */
9665 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
9666 if (slp_perm
9667 && (group_size != scalar_lanes
9668 || !multiple_p (nunits, group_size)))
9670 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9671 variable VF; see vect_transform_slp_perm_load. */
9672 unsigned int const_vf = vf.to_constant ();
9673 unsigned int const_nunits = nunits.to_constant ();
9674 vec_num = CEIL (group_size * const_vf, const_nunits);
9675 group_gap_adj = vf * group_size - nunits * vec_num;
9677 else
9679 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9680 group_gap_adj
9681 = group_size - scalar_lanes;
9684 else
9685 vec_num = group_size;
9687 ref_type = get_group_alias_ptr_type (first_stmt_info);
9689 else
9691 first_stmt_info = stmt_info;
9692 first_dr_info = dr_info;
9693 group_size = vec_num = 1;
9694 group_gap_adj = 0;
9695 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9696 if (slp)
9697 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9700 gcc_assert (alignment_support_scheme);
9701 vec_loop_masks *loop_masks
9702 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9703 ? &LOOP_VINFO_MASKS (loop_vinfo)
9704 : NULL);
9705 vec_loop_lens *loop_lens
9706 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
9707 ? &LOOP_VINFO_LENS (loop_vinfo)
9708 : NULL);
9710 /* Shouldn't go with length-based approach if fully masked. */
9711 gcc_assert (!loop_lens || !loop_masks);
9713 /* Targets with store-lane instructions must not require explicit
9714 realignment. vect_supportable_dr_alignment always returns either
9715 dr_aligned or dr_unaligned_supported for masked operations. */
9716 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9717 && !mask
9718 && !loop_masks)
9719 || alignment_support_scheme == dr_aligned
9720 || alignment_support_scheme == dr_unaligned_supported);
9722 /* In case the vectorization factor (VF) is bigger than the number
9723 of elements that we can fit in a vectype (nunits), we have to generate
9724 more than one vector stmt - i.e - we need to "unroll" the
9725 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9726 from one copy of the vector stmt to the next, in the field
9727 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9728 stages to find the correct vector defs to be used when vectorizing
9729 stmts that use the defs of the current stmt. The example below
9730 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9731 need to create 4 vectorized stmts):
9733 before vectorization:
9734 RELATED_STMT VEC_STMT
9735 S1: x = memref - -
9736 S2: z = x + 1 - -
9738 step 1: vectorize stmt S1:
9739 We first create the vector stmt VS1_0, and, as usual, record a
9740 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9741 Next, we create the vector stmt VS1_1, and record a pointer to
9742 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9743 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9744 stmts and pointers:
9745 RELATED_STMT VEC_STMT
9746 VS1_0: vx0 = memref0 VS1_1 -
9747 VS1_1: vx1 = memref1 VS1_2 -
9748 VS1_2: vx2 = memref2 VS1_3 -
9749 VS1_3: vx3 = memref3 - -
9750 S1: x = load - VS1_0
9751 S2: z = x + 1 - -
9754 /* In case of interleaving (non-unit grouped access):
9756 S1: x2 = &base + 2
9757 S2: x0 = &base
9758 S3: x1 = &base + 1
9759 S4: x3 = &base + 3
9761 Vectorized loads are created in the order of memory accesses
9762 starting from the access of the first stmt of the chain:
9764 VS1: vx0 = &base
9765 VS2: vx1 = &base + vec_size*1
9766 VS3: vx3 = &base + vec_size*2
9767 VS4: vx4 = &base + vec_size*3
9769 Then permutation statements are generated:
9771 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9772 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9775 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9776 (the order of the data-refs in the output of vect_permute_load_chain
9777 corresponds to the order of scalar stmts in the interleaving chain - see
9778 the documentation of vect_permute_load_chain()).
9779 The generation of permutation stmts and recording them in
9780 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9782 In case of both multiple types and interleaving, the vector loads and
9783 permutation stmts above are created for every copy. The result vector
9784 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9785 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9787 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9788 on a target that supports unaligned accesses (dr_unaligned_supported)
9789 we generate the following code:
9790 p = initial_addr;
9791 indx = 0;
9792 loop {
9793 p = p + indx * vectype_size;
9794 vec_dest = *(p);
9795 indx = indx + 1;
9798 Otherwise, the data reference is potentially unaligned on a target that
9799 does not support unaligned accesses (dr_explicit_realign_optimized) -
9800 then generate the following code, in which the data in each iteration is
9801 obtained by two vector loads, one from the previous iteration, and one
9802 from the current iteration:
9803 p1 = initial_addr;
9804 msq_init = *(floor(p1))
9805 p2 = initial_addr + VS - 1;
9806 realignment_token = call target_builtin;
9807 indx = 0;
9808 loop {
9809 p2 = p2 + indx * vectype_size
9810 lsq = *(floor(p2))
9811 vec_dest = realign_load (msq, lsq, realignment_token)
9812 indx = indx + 1;
9813 msq = lsq;
9814 } */
9816 /* If the misalignment remains the same throughout the execution of the
9817 loop, we can create the init_addr and permutation mask at the loop
9818 preheader. Otherwise, it needs to be created inside the loop.
9819 This can only occur when vectorizing memory accesses in the inner-loop
9820 nested within an outer-loop that is being vectorized. */
9822 if (nested_in_vect_loop
9823 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9824 GET_MODE_SIZE (TYPE_MODE (vectype))))
9826 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9827 compute_in_loop = true;
9830 bool diff_first_stmt_info
9831 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9833 tree offset = NULL_TREE;
9834 if ((alignment_support_scheme == dr_explicit_realign_optimized
9835 || alignment_support_scheme == dr_explicit_realign)
9836 && !compute_in_loop)
9838 /* If we have different first_stmt_info, we can't set up realignment
9839 here, since we can't guarantee first_stmt_info DR has been
9840 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9841 distance from first_stmt_info DR instead as below. */
9842 if (!diff_first_stmt_info)
9843 msq = vect_setup_realignment (vinfo,
9844 first_stmt_info, gsi, &realignment_token,
9845 alignment_support_scheme, NULL_TREE,
9846 &at_loop);
9847 if (alignment_support_scheme == dr_explicit_realign_optimized)
9849 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9850 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9851 size_one_node);
9852 gcc_assert (!first_stmt_info_for_drptr);
9855 else
9856 at_loop = loop;
9858 if (!known_eq (poffset, 0))
9859 offset = (offset
9860 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
9861 : size_int (poffset));
9863 tree bump;
9864 tree vec_offset = NULL_TREE;
9865 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9867 aggr_type = NULL_TREE;
9868 bump = NULL_TREE;
9870 else if (memory_access_type == VMAT_GATHER_SCATTER)
9872 aggr_type = elem_type;
9873 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9874 &bump, &vec_offset);
9876 else
9878 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9879 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9880 else
9881 aggr_type = vectype;
9882 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9883 memory_access_type);
9886 auto_vec<tree> vec_offsets;
9887 auto_vec<tree> vec_masks;
9888 if (mask)
9890 if (slp_node)
9891 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
9892 &vec_masks);
9893 else
9894 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
9895 &vec_masks, mask_vectype);
9897 tree vec_mask = NULL_TREE;
9898 poly_uint64 group_elt = 0;
9899 for (j = 0; j < ncopies; j++)
9901 /* 1. Create the vector or array pointer update chain. */
9902 if (j == 0)
9904 bool simd_lane_access_p
9905 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9906 if (simd_lane_access_p
9907 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9908 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9909 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9910 && integer_zerop (DR_INIT (first_dr_info->dr))
9911 && alias_sets_conflict_p (get_alias_set (aggr_type),
9912 get_alias_set (TREE_TYPE (ref_type)))
9913 && (alignment_support_scheme == dr_aligned
9914 || alignment_support_scheme == dr_unaligned_supported))
9916 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9917 dataref_offset = build_int_cst (ref_type, 0);
9919 else if (diff_first_stmt_info)
9921 dataref_ptr
9922 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9923 aggr_type, at_loop, offset, &dummy,
9924 gsi, &ptr_incr, simd_lane_access_p,
9925 bump);
9926 /* Adjust the pointer by the difference to first_stmt. */
9927 data_reference_p ptrdr
9928 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9929 tree diff
9930 = fold_convert (sizetype,
9931 size_binop (MINUS_EXPR,
9932 DR_INIT (first_dr_info->dr),
9933 DR_INIT (ptrdr)));
9934 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9935 stmt_info, diff);
9936 if (alignment_support_scheme == dr_explicit_realign)
9938 msq = vect_setup_realignment (vinfo,
9939 first_stmt_info_for_drptr, gsi,
9940 &realignment_token,
9941 alignment_support_scheme,
9942 dataref_ptr, &at_loop);
9943 gcc_assert (!compute_in_loop);
9946 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9948 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
9949 slp_node, &gs_info, &dataref_ptr,
9950 &vec_offsets);
9952 else
9953 dataref_ptr
9954 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9955 at_loop,
9956 offset, &dummy, gsi, &ptr_incr,
9957 simd_lane_access_p, bump);
9958 if (mask)
9959 vec_mask = vec_masks[0];
9961 else
9963 if (dataref_offset)
9964 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9965 bump);
9966 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9967 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9968 stmt_info, bump);
9969 if (mask)
9970 vec_mask = vec_masks[j];
9973 if (grouped_load || slp_perm)
9974 dr_chain.create (vec_num);
9976 gimple *new_stmt = NULL;
9977 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9979 tree vec_array;
9981 vec_array = create_vector_array (vectype, vec_num);
9983 tree final_mask = NULL_TREE;
9984 if (loop_masks)
9985 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9986 vectype, j);
9987 if (vec_mask)
9988 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9989 final_mask, vec_mask, gsi);
9991 gcall *call;
9992 if (final_mask)
9994 /* Emit:
9995 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9996 VEC_MASK). */
9997 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9998 tree alias_ptr = build_int_cst (ref_type, align);
9999 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
10000 dataref_ptr, alias_ptr,
10001 final_mask);
10003 else
10005 /* Emit:
10006 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
10007 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
10008 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
10010 gimple_call_set_lhs (call, vec_array);
10011 gimple_call_set_nothrow (call, true);
10012 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
10013 new_stmt = call;
10015 /* Extract each vector into an SSA_NAME. */
10016 for (i = 0; i < vec_num; i++)
10018 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
10019 vec_array, i);
10020 dr_chain.quick_push (new_temp);
10023 /* Record the mapping between SSA_NAMEs and statements. */
10024 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
10026 /* Record that VEC_ARRAY is now dead. */
10027 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
10029 else
10031 for (i = 0; i < vec_num; i++)
10033 tree final_mask = NULL_TREE;
10034 if (loop_masks
10035 && memory_access_type != VMAT_INVARIANT)
10036 final_mask = vect_get_loop_mask (gsi, loop_masks,
10037 vec_num * ncopies,
10038 vectype, vec_num * j + i);
10039 if (vec_mask)
10040 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
10041 final_mask, vec_mask, gsi);
10043 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10044 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10045 gsi, stmt_info, bump);
10047 /* 2. Create the vector-load in the loop. */
10048 switch (alignment_support_scheme)
10050 case dr_aligned:
10051 case dr_unaligned_supported:
10053 unsigned int misalign;
10054 unsigned HOST_WIDE_INT align;
10056 if (memory_access_type == VMAT_GATHER_SCATTER
10057 && gs_info.ifn != IFN_LAST)
10059 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10060 vec_offset = vec_offsets[vec_num * j + i];
10061 tree zero = build_zero_cst (vectype);
10062 tree scale = size_int (gs_info.scale);
10063 gcall *call;
10064 if (final_mask)
10065 call = gimple_build_call_internal
10066 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
10067 vec_offset, scale, zero, final_mask);
10068 else
10069 call = gimple_build_call_internal
10070 (IFN_GATHER_LOAD, 4, dataref_ptr,
10071 vec_offset, scale, zero);
10072 gimple_call_set_nothrow (call, true);
10073 new_stmt = call;
10074 data_ref = NULL_TREE;
10075 break;
10077 else if (memory_access_type == VMAT_GATHER_SCATTER)
10079 /* Emulated gather-scatter. */
10080 gcc_assert (!final_mask);
10081 unsigned HOST_WIDE_INT const_nunits
10082 = nunits.to_constant ();
10083 unsigned HOST_WIDE_INT const_offset_nunits
10084 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
10085 .to_constant ();
10086 vec<constructor_elt, va_gc> *ctor_elts;
10087 vec_alloc (ctor_elts, const_nunits);
10088 gimple_seq stmts = NULL;
10089 /* We support offset vectors with more elements
10090 than the data vector for now. */
10091 unsigned HOST_WIDE_INT factor
10092 = const_offset_nunits / const_nunits;
10093 vec_offset = vec_offsets[j / factor];
10094 unsigned elt_offset = (j % factor) * const_nunits;
10095 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
10096 tree scale = size_int (gs_info.scale);
10097 align
10098 = get_object_alignment (DR_REF (first_dr_info->dr));
10099 tree ltype = build_aligned_type (TREE_TYPE (vectype),
10100 align);
10101 for (unsigned k = 0; k < const_nunits; ++k)
10103 tree boff = size_binop (MULT_EXPR,
10104 TYPE_SIZE (idx_type),
10105 bitsize_int
10106 (k + elt_offset));
10107 tree idx = gimple_build (&stmts, BIT_FIELD_REF,
10108 idx_type, vec_offset,
10109 TYPE_SIZE (idx_type),
10110 boff);
10111 idx = gimple_convert (&stmts, sizetype, idx);
10112 idx = gimple_build (&stmts, MULT_EXPR,
10113 sizetype, idx, scale);
10114 tree ptr = gimple_build (&stmts, PLUS_EXPR,
10115 TREE_TYPE (dataref_ptr),
10116 dataref_ptr, idx);
10117 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
10118 tree elt = make_ssa_name (TREE_TYPE (vectype));
10119 tree ref = build2 (MEM_REF, ltype, ptr,
10120 build_int_cst (ref_type, 0));
10121 new_stmt = gimple_build_assign (elt, ref);
10122 gimple_set_vuse (new_stmt,
10123 gimple_vuse (gsi_stmt (*gsi)));
10124 gimple_seq_add_stmt (&stmts, new_stmt);
10125 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
10127 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
10128 new_stmt = gimple_build_assign (NULL_TREE,
10129 build_constructor
10130 (vectype, ctor_elts));
10131 data_ref = NULL_TREE;
10132 break;
10135 align =
10136 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
10137 if (alignment_support_scheme == dr_aligned)
10138 misalign = 0;
10139 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
10141 align = dr_alignment
10142 (vect_dr_behavior (vinfo, first_dr_info));
10143 misalign = 0;
10145 else
10146 misalign = misalignment;
10147 if (dataref_offset == NULL_TREE
10148 && TREE_CODE (dataref_ptr) == SSA_NAME)
10149 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
10150 align, misalign);
10151 align = least_bit_hwi (misalign | align);
10153 if (final_mask)
10155 tree ptr = build_int_cst (ref_type,
10156 align * BITS_PER_UNIT);
10157 gcall *call
10158 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
10159 dataref_ptr, ptr,
10160 final_mask);
10161 gimple_call_set_nothrow (call, true);
10162 new_stmt = call;
10163 data_ref = NULL_TREE;
10165 else if (loop_lens && memory_access_type != VMAT_INVARIANT)
10167 machine_mode vmode = TYPE_MODE (vectype);
10168 opt_machine_mode new_ovmode
10169 = get_len_load_store_mode (vmode, true);
10170 machine_mode new_vmode = new_ovmode.require ();
10171 unsigned factor = (new_ovmode == vmode)
10173 : GET_MODE_UNIT_SIZE (vmode);
10174 tree final_len
10175 = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10176 vec_num * ncopies, vectype,
10177 vec_num * j + i, factor);
10178 tree ptr
10179 = build_int_cst (ref_type, align * BITS_PER_UNIT);
10181 tree qi_type = unsigned_intQI_type_node;
10183 signed char biasval =
10184 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10186 tree bias = build_int_cst (intQI_type_node, biasval);
10188 gcall *call
10189 = gimple_build_call_internal (IFN_LEN_LOAD, 4,
10190 dataref_ptr, ptr,
10191 final_len, bias);
10192 gimple_call_set_nothrow (call, true);
10193 new_stmt = call;
10194 data_ref = NULL_TREE;
10196 /* Need conversion if it's wrapped with VnQI. */
10197 if (vmode != new_vmode)
10199 tree new_vtype
10200 = build_vector_type_for_mode (qi_type, new_vmode);
10201 tree var = vect_get_new_ssa_name (new_vtype,
10202 vect_simple_var);
10203 gimple_set_lhs (call, var);
10204 vect_finish_stmt_generation (vinfo, stmt_info, call,
10205 gsi);
10206 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
10207 new_stmt
10208 = gimple_build_assign (vec_dest,
10209 VIEW_CONVERT_EXPR, op);
10212 else
10214 tree ltype = vectype;
10215 tree new_vtype = NULL_TREE;
10216 unsigned HOST_WIDE_INT gap
10217 = DR_GROUP_GAP (first_stmt_info);
10218 unsigned int vect_align
10219 = vect_known_alignment_in_bytes (first_dr_info,
10220 vectype);
10221 unsigned int scalar_dr_size
10222 = vect_get_scalar_dr_size (first_dr_info);
10223 /* If there's no peeling for gaps but we have a gap
10224 with slp loads then load the lower half of the
10225 vector only. See get_group_load_store_type for
10226 when we apply this optimization. */
10227 if (slp
10228 && loop_vinfo
10229 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
10230 && gap != 0
10231 && known_eq (nunits, (group_size - gap) * 2)
10232 && known_eq (nunits, group_size)
10233 && gap >= (vect_align / scalar_dr_size))
10235 tree half_vtype;
10236 new_vtype
10237 = vector_vector_composition_type (vectype, 2,
10238 &half_vtype);
10239 if (new_vtype != NULL_TREE)
10240 ltype = half_vtype;
10242 tree offset
10243 = (dataref_offset ? dataref_offset
10244 : build_int_cst (ref_type, 0));
10245 if (ltype != vectype
10246 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
10248 unsigned HOST_WIDE_INT gap_offset
10249 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
10250 tree gapcst = build_int_cst (ref_type, gap_offset);
10251 offset = size_binop (PLUS_EXPR, offset, gapcst);
10253 data_ref
10254 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
10255 if (alignment_support_scheme == dr_aligned)
10257 else
10258 TREE_TYPE (data_ref)
10259 = build_aligned_type (TREE_TYPE (data_ref),
10260 align * BITS_PER_UNIT);
10261 if (ltype != vectype)
10263 vect_copy_ref_info (data_ref,
10264 DR_REF (first_dr_info->dr));
10265 tree tem = make_ssa_name (ltype);
10266 new_stmt = gimple_build_assign (tem, data_ref);
10267 vect_finish_stmt_generation (vinfo, stmt_info,
10268 new_stmt, gsi);
10269 data_ref = NULL;
10270 vec<constructor_elt, va_gc> *v;
10271 vec_alloc (v, 2);
10272 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
10274 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10275 build_zero_cst (ltype));
10276 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
10278 else
10280 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
10281 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10282 build_zero_cst (ltype));
10284 gcc_assert (new_vtype != NULL_TREE);
10285 if (new_vtype == vectype)
10286 new_stmt = gimple_build_assign (
10287 vec_dest, build_constructor (vectype, v));
10288 else
10290 tree new_vname = make_ssa_name (new_vtype);
10291 new_stmt = gimple_build_assign (
10292 new_vname, build_constructor (new_vtype, v));
10293 vect_finish_stmt_generation (vinfo, stmt_info,
10294 new_stmt, gsi);
10295 new_stmt = gimple_build_assign (
10296 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
10297 new_vname));
10301 break;
10303 case dr_explicit_realign:
10305 tree ptr, bump;
10307 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10309 if (compute_in_loop)
10310 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
10311 &realignment_token,
10312 dr_explicit_realign,
10313 dataref_ptr, NULL);
10315 if (TREE_CODE (dataref_ptr) == SSA_NAME)
10316 ptr = copy_ssa_name (dataref_ptr);
10317 else
10318 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
10319 // For explicit realign the target alignment should be
10320 // known at compile time.
10321 unsigned HOST_WIDE_INT align =
10322 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
10323 new_stmt = gimple_build_assign
10324 (ptr, BIT_AND_EXPR, dataref_ptr,
10325 build_int_cst
10326 (TREE_TYPE (dataref_ptr),
10327 -(HOST_WIDE_INT) align));
10328 vect_finish_stmt_generation (vinfo, stmt_info,
10329 new_stmt, gsi);
10330 data_ref
10331 = build2 (MEM_REF, vectype, ptr,
10332 build_int_cst (ref_type, 0));
10333 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10334 vec_dest = vect_create_destination_var (scalar_dest,
10335 vectype);
10336 new_stmt = gimple_build_assign (vec_dest, data_ref);
10337 new_temp = make_ssa_name (vec_dest, new_stmt);
10338 gimple_assign_set_lhs (new_stmt, new_temp);
10339 gimple_move_vops (new_stmt, stmt_info->stmt);
10340 vect_finish_stmt_generation (vinfo, stmt_info,
10341 new_stmt, gsi);
10342 msq = new_temp;
10344 bump = size_binop (MULT_EXPR, vs,
10345 TYPE_SIZE_UNIT (elem_type));
10346 bump = size_binop (MINUS_EXPR, bump, size_one_node);
10347 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
10348 stmt_info, bump);
10349 new_stmt = gimple_build_assign
10350 (NULL_TREE, BIT_AND_EXPR, ptr,
10351 build_int_cst
10352 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
10353 if (TREE_CODE (ptr) == SSA_NAME)
10354 ptr = copy_ssa_name (ptr, new_stmt);
10355 else
10356 ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
10357 gimple_assign_set_lhs (new_stmt, ptr);
10358 vect_finish_stmt_generation (vinfo, stmt_info,
10359 new_stmt, gsi);
10360 data_ref
10361 = build2 (MEM_REF, vectype, ptr,
10362 build_int_cst (ref_type, 0));
10363 break;
10365 case dr_explicit_realign_optimized:
10367 if (TREE_CODE (dataref_ptr) == SSA_NAME)
10368 new_temp = copy_ssa_name (dataref_ptr);
10369 else
10370 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
10371 // We should only be doing this if we know the target
10372 // alignment at compile time.
10373 unsigned HOST_WIDE_INT align =
10374 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
10375 new_stmt = gimple_build_assign
10376 (new_temp, BIT_AND_EXPR, dataref_ptr,
10377 build_int_cst (TREE_TYPE (dataref_ptr),
10378 -(HOST_WIDE_INT) align));
10379 vect_finish_stmt_generation (vinfo, stmt_info,
10380 new_stmt, gsi);
10381 data_ref
10382 = build2 (MEM_REF, vectype, new_temp,
10383 build_int_cst (ref_type, 0));
10384 break;
10386 default:
10387 gcc_unreachable ();
10389 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10390 /* DATA_REF is null if we've already built the statement. */
10391 if (data_ref)
10393 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10394 new_stmt = gimple_build_assign (vec_dest, data_ref);
10396 new_temp = make_ssa_name (vec_dest, new_stmt);
10397 gimple_set_lhs (new_stmt, new_temp);
10398 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10400 /* 3. Handle explicit realignment if necessary/supported.
10401 Create in loop:
10402 vec_dest = realign_load (msq, lsq, realignment_token) */
10403 if (alignment_support_scheme == dr_explicit_realign_optimized
10404 || alignment_support_scheme == dr_explicit_realign)
10406 lsq = gimple_assign_lhs (new_stmt);
10407 if (!realignment_token)
10408 realignment_token = dataref_ptr;
10409 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10410 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
10411 msq, lsq, realignment_token);
10412 new_temp = make_ssa_name (vec_dest, new_stmt);
10413 gimple_assign_set_lhs (new_stmt, new_temp);
10414 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10416 if (alignment_support_scheme == dr_explicit_realign_optimized)
10418 gcc_assert (phi);
10419 if (i == vec_num - 1 && j == ncopies - 1)
10420 add_phi_arg (phi, lsq,
10421 loop_latch_edge (containing_loop),
10422 UNKNOWN_LOCATION);
10423 msq = lsq;
10427 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
10429 tree perm_mask = perm_mask_for_reverse (vectype);
10430 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
10431 perm_mask, stmt_info, gsi);
10432 new_stmt = SSA_NAME_DEF_STMT (new_temp);
10435 /* Collect vector loads and later create their permutation in
10436 vect_transform_grouped_load (). */
10437 if (grouped_load || slp_perm)
10438 dr_chain.quick_push (new_temp);
10440 /* Store vector loads in the corresponding SLP_NODE. */
10441 if (slp && !slp_perm)
10442 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10444 /* With SLP permutation we load the gaps as well, without
10445 we need to skip the gaps after we manage to fully load
10446 all elements. group_gap_adj is DR_GROUP_SIZE here. */
10447 group_elt += nunits;
10448 if (maybe_ne (group_gap_adj, 0U)
10449 && !slp_perm
10450 && known_eq (group_elt, group_size - group_gap_adj))
10452 poly_wide_int bump_val
10453 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
10454 * group_gap_adj);
10455 if (tree_int_cst_sgn
10456 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
10457 bump_val = -bump_val;
10458 tree bump = wide_int_to_tree (sizetype, bump_val);
10459 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10460 gsi, stmt_info, bump);
10461 group_elt = 0;
10464 /* Bump the vector pointer to account for a gap or for excess
10465 elements loaded for a permuted SLP load. */
10466 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
10468 poly_wide_int bump_val
10469 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
10470 * group_gap_adj);
10471 if (tree_int_cst_sgn
10472 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
10473 bump_val = -bump_val;
10474 tree bump = wide_int_to_tree (sizetype, bump_val);
10475 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10476 stmt_info, bump);
10480 if (slp && !slp_perm)
10481 continue;
10483 if (slp_perm)
10485 unsigned n_perms;
10486 /* For SLP we know we've seen all possible uses of dr_chain so
10487 direct vect_transform_slp_perm_load to DCE the unused parts.
10488 ??? This is a hack to prevent compile-time issues as seen
10489 in PR101120 and friends. */
10490 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
10491 gsi, vf, false, &n_perms,
10492 nullptr, true);
10493 gcc_assert (ok);
10495 else
10497 if (grouped_load)
10499 if (memory_access_type != VMAT_LOAD_STORE_LANES)
10500 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
10501 group_size, gsi);
10502 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10504 else
10506 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10509 dr_chain.release ();
10511 if (!slp)
10512 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10514 return true;
10517 /* Function vect_is_simple_cond.
10519 Input:
10520 LOOP - the loop that is being vectorized.
10521 COND - Condition that is checked for simple use.
10523 Output:
10524 *COMP_VECTYPE - the vector type for the comparison.
10525 *DTS - The def types for the arguments of the comparison
10527 Returns whether a COND can be vectorized. Checks whether
10528 condition operands are supportable using vec_is_simple_use. */
10530 static bool
10531 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
10532 slp_tree slp_node, tree *comp_vectype,
10533 enum vect_def_type *dts, tree vectype)
10535 tree lhs, rhs;
10536 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10537 slp_tree slp_op;
10539 /* Mask case. */
10540 if (TREE_CODE (cond) == SSA_NAME
10541 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
10543 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
10544 &slp_op, &dts[0], comp_vectype)
10545 || !*comp_vectype
10546 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
10547 return false;
10548 return true;
10551 if (!COMPARISON_CLASS_P (cond))
10552 return false;
10554 lhs = TREE_OPERAND (cond, 0);
10555 rhs = TREE_OPERAND (cond, 1);
10557 if (TREE_CODE (lhs) == SSA_NAME)
10559 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
10560 &lhs, &slp_op, &dts[0], &vectype1))
10561 return false;
10563 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
10564 || TREE_CODE (lhs) == FIXED_CST)
10565 dts[0] = vect_constant_def;
10566 else
10567 return false;
10569 if (TREE_CODE (rhs) == SSA_NAME)
10571 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
10572 &rhs, &slp_op, &dts[1], &vectype2))
10573 return false;
10575 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
10576 || TREE_CODE (rhs) == FIXED_CST)
10577 dts[1] = vect_constant_def;
10578 else
10579 return false;
10581 if (vectype1 && vectype2
10582 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10583 TYPE_VECTOR_SUBPARTS (vectype2)))
10584 return false;
10586 *comp_vectype = vectype1 ? vectype1 : vectype2;
10587 /* Invariant comparison. */
10588 if (! *comp_vectype)
10590 tree scalar_type = TREE_TYPE (lhs);
10591 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10592 *comp_vectype = truth_type_for (vectype);
10593 else
10595 /* If we can widen the comparison to match vectype do so. */
10596 if (INTEGRAL_TYPE_P (scalar_type)
10597 && !slp_node
10598 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
10599 TYPE_SIZE (TREE_TYPE (vectype))))
10600 scalar_type = build_nonstandard_integer_type
10601 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
10602 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
10603 slp_node);
10607 return true;
10610 /* vectorizable_condition.
10612 Check if STMT_INFO is conditional modify expression that can be vectorized.
10613 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10614 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10615 at GSI.
10617 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10619 Return true if STMT_INFO is vectorizable in this way. */
10621 static bool
10622 vectorizable_condition (vec_info *vinfo,
10623 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10624 gimple **vec_stmt,
10625 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10627 tree scalar_dest = NULL_TREE;
10628 tree vec_dest = NULL_TREE;
10629 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
10630 tree then_clause, else_clause;
10631 tree comp_vectype = NULL_TREE;
10632 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
10633 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
10634 tree vec_compare;
10635 tree new_temp;
10636 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10637 enum vect_def_type dts[4]
10638 = {vect_unknown_def_type, vect_unknown_def_type,
10639 vect_unknown_def_type, vect_unknown_def_type};
10640 int ndts = 4;
10641 int ncopies;
10642 int vec_num;
10643 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10644 int i;
10645 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10646 vec<tree> vec_oprnds0 = vNULL;
10647 vec<tree> vec_oprnds1 = vNULL;
10648 vec<tree> vec_oprnds2 = vNULL;
10649 vec<tree> vec_oprnds3 = vNULL;
10650 tree vec_cmp_type;
10651 bool masked = false;
10653 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10654 return false;
10656 /* Is vectorizable conditional operation? */
10657 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10658 if (!stmt)
10659 return false;
10661 code = gimple_assign_rhs_code (stmt);
10662 if (code != COND_EXPR)
10663 return false;
10665 stmt_vec_info reduc_info = NULL;
10666 int reduc_index = -1;
10667 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
10668 bool for_reduction
10669 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
10670 if (for_reduction)
10672 if (slp_node)
10673 return false;
10674 reduc_info = info_for_reduction (vinfo, stmt_info);
10675 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
10676 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
10677 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
10678 || reduc_index != -1);
10680 else
10682 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10683 return false;
10686 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10687 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10689 if (slp_node)
10691 ncopies = 1;
10692 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10694 else
10696 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10697 vec_num = 1;
10700 gcc_assert (ncopies >= 1);
10701 if (for_reduction && ncopies > 1)
10702 return false; /* FORNOW */
10704 cond_expr = gimple_assign_rhs1 (stmt);
10706 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
10707 &comp_vectype, &dts[0], vectype)
10708 || !comp_vectype)
10709 return false;
10711 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
10712 slp_tree then_slp_node, else_slp_node;
10713 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
10714 &then_clause, &then_slp_node, &dts[2], &vectype1))
10715 return false;
10716 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
10717 &else_clause, &else_slp_node, &dts[3], &vectype2))
10718 return false;
10720 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10721 return false;
10723 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10724 return false;
10726 masked = !COMPARISON_CLASS_P (cond_expr);
10727 vec_cmp_type = truth_type_for (comp_vectype);
10729 if (vec_cmp_type == NULL_TREE)
10730 return false;
10732 cond_code = TREE_CODE (cond_expr);
10733 if (!masked)
10735 cond_expr0 = TREE_OPERAND (cond_expr, 0);
10736 cond_expr1 = TREE_OPERAND (cond_expr, 1);
10739 /* For conditional reductions, the "then" value needs to be the candidate
10740 value calculated by this iteration while the "else" value needs to be
10741 the result carried over from previous iterations. If the COND_EXPR
10742 is the other way around, we need to swap it. */
10743 bool must_invert_cmp_result = false;
10744 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10746 if (masked)
10747 must_invert_cmp_result = true;
10748 else
10750 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10751 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10752 if (new_code == ERROR_MARK)
10753 must_invert_cmp_result = true;
10754 else
10756 cond_code = new_code;
10757 /* Make sure we don't accidentally use the old condition. */
10758 cond_expr = NULL_TREE;
10761 std::swap (then_clause, else_clause);
10764 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10766 /* Boolean values may have another representation in vectors
10767 and therefore we prefer bit operations over comparison for
10768 them (which also works for scalar masks). We store opcodes
10769 to use in bitop1 and bitop2. Statement is vectorized as
10770 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10771 depending on bitop1 and bitop2 arity. */
10772 switch (cond_code)
10774 case GT_EXPR:
10775 bitop1 = BIT_NOT_EXPR;
10776 bitop2 = BIT_AND_EXPR;
10777 break;
10778 case GE_EXPR:
10779 bitop1 = BIT_NOT_EXPR;
10780 bitop2 = BIT_IOR_EXPR;
10781 break;
10782 case LT_EXPR:
10783 bitop1 = BIT_NOT_EXPR;
10784 bitop2 = BIT_AND_EXPR;
10785 std::swap (cond_expr0, cond_expr1);
10786 break;
10787 case LE_EXPR:
10788 bitop1 = BIT_NOT_EXPR;
10789 bitop2 = BIT_IOR_EXPR;
10790 std::swap (cond_expr0, cond_expr1);
10791 break;
10792 case NE_EXPR:
10793 bitop1 = BIT_XOR_EXPR;
10794 break;
10795 case EQ_EXPR:
10796 bitop1 = BIT_XOR_EXPR;
10797 bitop2 = BIT_NOT_EXPR;
10798 break;
10799 default:
10800 return false;
10802 cond_code = SSA_NAME;
10805 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10806 && reduction_type == EXTRACT_LAST_REDUCTION
10807 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10809 if (dump_enabled_p ())
10810 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10811 "reduction comparison operation not supported.\n");
10812 return false;
10815 if (!vec_stmt)
10817 if (bitop1 != NOP_EXPR)
10819 machine_mode mode = TYPE_MODE (comp_vectype);
10820 optab optab;
10822 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10823 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10824 return false;
10826 if (bitop2 != NOP_EXPR)
10828 optab = optab_for_tree_code (bitop2, comp_vectype,
10829 optab_default);
10830 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10831 return false;
10835 vect_cost_for_stmt kind = vector_stmt;
10836 if (reduction_type == EXTRACT_LAST_REDUCTION)
10837 /* Count one reduction-like operation per vector. */
10838 kind = vec_to_scalar;
10839 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)
10840 && (masked
10841 || (!expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type,
10842 cond_code)
10843 || !expand_vec_cond_expr_p (vectype, vec_cmp_type,
10844 ERROR_MARK))))
10845 return false;
10847 if (slp_node
10848 && (!vect_maybe_update_slp_op_vectype
10849 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10850 || (op_adjust == 1
10851 && !vect_maybe_update_slp_op_vectype
10852 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10853 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10854 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10856 if (dump_enabled_p ())
10857 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10858 "incompatible vector types for invariants\n");
10859 return false;
10862 if (loop_vinfo && for_reduction
10863 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10865 if (reduction_type == EXTRACT_LAST_REDUCTION)
10866 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10867 ncopies * vec_num, vectype, NULL);
10868 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10869 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10871 if (dump_enabled_p ())
10872 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10873 "conditional reduction prevents the use"
10874 " of partial vectors.\n");
10875 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10879 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10880 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10881 cost_vec, kind);
10882 return true;
10885 /* Transform. */
10887 /* Handle def. */
10888 scalar_dest = gimple_assign_lhs (stmt);
10889 if (reduction_type != EXTRACT_LAST_REDUCTION)
10890 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10892 bool swap_cond_operands = false;
10894 /* See whether another part of the vectorized code applies a loop
10895 mask to the condition, or to its inverse. */
10897 vec_loop_masks *masks = NULL;
10898 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10900 if (reduction_type == EXTRACT_LAST_REDUCTION)
10901 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10902 else
10904 scalar_cond_masked_key cond (cond_expr, ncopies);
10905 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10906 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10907 else
10909 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10910 tree_code orig_code = cond.code;
10911 cond.code = invert_tree_comparison (cond.code, honor_nans);
10912 if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
10914 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10915 cond_code = cond.code;
10916 swap_cond_operands = true;
10918 else
10920 /* Try the inverse of the current mask. We check if the
10921 inverse mask is live and if so we generate a negate of
10922 the current mask such that we still honor NaNs. */
10923 cond.inverted_p = true;
10924 cond.code = orig_code;
10925 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10927 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10928 cond_code = cond.code;
10929 swap_cond_operands = true;
10930 must_invert_cmp_result = true;
10937 /* Handle cond expr. */
10938 if (masked)
10939 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10940 cond_expr, &vec_oprnds0, comp_vectype,
10941 then_clause, &vec_oprnds2, vectype,
10942 reduction_type != EXTRACT_LAST_REDUCTION
10943 ? else_clause : NULL, &vec_oprnds3, vectype);
10944 else
10945 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10946 cond_expr0, &vec_oprnds0, comp_vectype,
10947 cond_expr1, &vec_oprnds1, comp_vectype,
10948 then_clause, &vec_oprnds2, vectype,
10949 reduction_type != EXTRACT_LAST_REDUCTION
10950 ? else_clause : NULL, &vec_oprnds3, vectype);
10952 /* Arguments are ready. Create the new vector stmt. */
10953 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10955 vec_then_clause = vec_oprnds2[i];
10956 if (reduction_type != EXTRACT_LAST_REDUCTION)
10957 vec_else_clause = vec_oprnds3[i];
10959 if (swap_cond_operands)
10960 std::swap (vec_then_clause, vec_else_clause);
10962 if (masked)
10963 vec_compare = vec_cond_lhs;
10964 else
10966 vec_cond_rhs = vec_oprnds1[i];
10967 if (bitop1 == NOP_EXPR)
10969 gimple_seq stmts = NULL;
10970 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
10971 vec_cond_lhs, vec_cond_rhs);
10972 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
10974 else
10976 new_temp = make_ssa_name (vec_cmp_type);
10977 gassign *new_stmt;
10978 if (bitop1 == BIT_NOT_EXPR)
10979 new_stmt = gimple_build_assign (new_temp, bitop1,
10980 vec_cond_rhs);
10981 else
10982 new_stmt
10983 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10984 vec_cond_rhs);
10985 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10986 if (bitop2 == NOP_EXPR)
10987 vec_compare = new_temp;
10988 else if (bitop2 == BIT_NOT_EXPR
10989 && reduction_type != EXTRACT_LAST_REDUCTION)
10991 /* Instead of doing ~x ? y : z do x ? z : y. */
10992 vec_compare = new_temp;
10993 std::swap (vec_then_clause, vec_else_clause);
10995 else
10997 vec_compare = make_ssa_name (vec_cmp_type);
10998 if (bitop2 == BIT_NOT_EXPR)
10999 new_stmt
11000 = gimple_build_assign (vec_compare, bitop2, new_temp);
11001 else
11002 new_stmt
11003 = gimple_build_assign (vec_compare, bitop2,
11004 vec_cond_lhs, new_temp);
11005 vect_finish_stmt_generation (vinfo, stmt_info,
11006 new_stmt, gsi);
11011 /* If we decided to apply a loop mask to the result of the vector
11012 comparison, AND the comparison with the mask now. Later passes
11013 should then be able to reuse the AND results between mulitple
11014 vector statements.
11016 For example:
11017 for (int i = 0; i < 100; ++i)
11018 x[i] = y[i] ? z[i] : 10;
11020 results in following optimized GIMPLE:
11022 mask__35.8_43 = vect__4.7_41 != { 0, ... };
11023 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
11024 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
11025 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
11026 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
11027 vect_iftmp.11_47, { 10, ... }>;
11029 instead of using a masked and unmasked forms of
11030 vec != { 0, ... } (masked in the MASK_LOAD,
11031 unmasked in the VEC_COND_EXPR). */
11033 /* Force vec_compare to be an SSA_NAME rather than a comparison,
11034 in cases where that's necessary. */
11036 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
11038 if (!is_gimple_val (vec_compare))
11040 tree vec_compare_name = make_ssa_name (vec_cmp_type);
11041 gassign *new_stmt = gimple_build_assign (vec_compare_name,
11042 vec_compare);
11043 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11044 vec_compare = vec_compare_name;
11047 if (must_invert_cmp_result)
11049 tree vec_compare_name = make_ssa_name (vec_cmp_type);
11050 gassign *new_stmt = gimple_build_assign (vec_compare_name,
11051 BIT_NOT_EXPR,
11052 vec_compare);
11053 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11054 vec_compare = vec_compare_name;
11057 if (masks)
11059 tree loop_mask
11060 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
11061 vectype, i);
11062 tree tmp2 = make_ssa_name (vec_cmp_type);
11063 gassign *g
11064 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
11065 loop_mask);
11066 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
11067 vec_compare = tmp2;
11071 gimple *new_stmt;
11072 if (reduction_type == EXTRACT_LAST_REDUCTION)
11074 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
11075 tree lhs = gimple_get_lhs (old_stmt);
11076 new_stmt = gimple_build_call_internal
11077 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
11078 vec_then_clause);
11079 gimple_call_set_lhs (new_stmt, lhs);
11080 SSA_NAME_DEF_STMT (lhs) = new_stmt;
11081 if (old_stmt == gsi_stmt (*gsi))
11082 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
11083 else
11085 /* In this case we're moving the definition to later in the
11086 block. That doesn't matter because the only uses of the
11087 lhs are in phi statements. */
11088 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
11089 gsi_remove (&old_gsi, true);
11090 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11093 else
11095 new_temp = make_ssa_name (vec_dest);
11096 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
11097 vec_then_clause, vec_else_clause);
11098 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11100 if (slp_node)
11101 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
11102 else
11103 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11106 if (!slp_node)
11107 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11109 vec_oprnds0.release ();
11110 vec_oprnds1.release ();
11111 vec_oprnds2.release ();
11112 vec_oprnds3.release ();
11114 return true;
11117 /* vectorizable_comparison.
11119 Check if STMT_INFO is comparison expression that can be vectorized.
11120 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
11121 comparison, put it in VEC_STMT, and insert it at GSI.
11123 Return true if STMT_INFO is vectorizable in this way. */
11125 static bool
11126 vectorizable_comparison (vec_info *vinfo,
11127 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11128 gimple **vec_stmt,
11129 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
11131 tree lhs, rhs1, rhs2;
11132 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11133 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
11134 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
11135 tree new_temp;
11136 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
11137 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
11138 int ndts = 2;
11139 poly_uint64 nunits;
11140 int ncopies;
11141 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
11142 int i;
11143 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
11144 vec<tree> vec_oprnds0 = vNULL;
11145 vec<tree> vec_oprnds1 = vNULL;
11146 tree mask_type;
11147 tree mask;
11149 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
11150 return false;
11152 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
11153 return false;
11155 mask_type = vectype;
11156 nunits = TYPE_VECTOR_SUBPARTS (vectype);
11158 if (slp_node)
11159 ncopies = 1;
11160 else
11161 ncopies = vect_get_num_copies (loop_vinfo, vectype);
11163 gcc_assert (ncopies >= 1);
11164 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
11165 return false;
11167 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
11168 if (!stmt)
11169 return false;
11171 code = gimple_assign_rhs_code (stmt);
11173 if (TREE_CODE_CLASS (code) != tcc_comparison)
11174 return false;
11176 slp_tree slp_rhs1, slp_rhs2;
11177 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
11178 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
11179 return false;
11181 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
11182 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
11183 return false;
11185 if (vectype1 && vectype2
11186 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
11187 TYPE_VECTOR_SUBPARTS (vectype2)))
11188 return false;
11190 vectype = vectype1 ? vectype1 : vectype2;
11192 /* Invariant comparison. */
11193 if (!vectype)
11195 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
11196 vectype = mask_type;
11197 else
11198 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
11199 slp_node);
11200 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
11201 return false;
11203 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
11204 return false;
11206 /* Can't compare mask and non-mask types. */
11207 if (vectype1 && vectype2
11208 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
11209 return false;
11211 /* Boolean values may have another representation in vectors
11212 and therefore we prefer bit operations over comparison for
11213 them (which also works for scalar masks). We store opcodes
11214 to use in bitop1 and bitop2. Statement is vectorized as
11215 BITOP2 (rhs1 BITOP1 rhs2) or
11216 rhs1 BITOP2 (BITOP1 rhs2)
11217 depending on bitop1 and bitop2 arity. */
11218 bool swap_p = false;
11219 if (VECTOR_BOOLEAN_TYPE_P (vectype))
11221 if (code == GT_EXPR)
11223 bitop1 = BIT_NOT_EXPR;
11224 bitop2 = BIT_AND_EXPR;
11226 else if (code == GE_EXPR)
11228 bitop1 = BIT_NOT_EXPR;
11229 bitop2 = BIT_IOR_EXPR;
11231 else if (code == LT_EXPR)
11233 bitop1 = BIT_NOT_EXPR;
11234 bitop2 = BIT_AND_EXPR;
11235 swap_p = true;
11237 else if (code == LE_EXPR)
11239 bitop1 = BIT_NOT_EXPR;
11240 bitop2 = BIT_IOR_EXPR;
11241 swap_p = true;
11243 else
11245 bitop1 = BIT_XOR_EXPR;
11246 if (code == EQ_EXPR)
11247 bitop2 = BIT_NOT_EXPR;
11251 if (!vec_stmt)
11253 if (bitop1 == NOP_EXPR)
11255 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
11256 return false;
11258 else
11260 machine_mode mode = TYPE_MODE (vectype);
11261 optab optab;
11263 optab = optab_for_tree_code (bitop1, vectype, optab_default);
11264 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
11265 return false;
11267 if (bitop2 != NOP_EXPR)
11269 optab = optab_for_tree_code (bitop2, vectype, optab_default);
11270 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
11271 return false;
11275 /* Put types on constant and invariant SLP children. */
11276 if (slp_node
11277 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
11278 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
11280 if (dump_enabled_p ())
11281 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11282 "incompatible vector types for invariants\n");
11283 return false;
11286 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
11287 vect_model_simple_cost (vinfo, stmt_info,
11288 ncopies * (1 + (bitop2 != NOP_EXPR)),
11289 dts, ndts, slp_node, cost_vec);
11290 return true;
11293 /* Transform. */
11295 /* Handle def. */
11296 lhs = gimple_assign_lhs (stmt);
11297 mask = vect_create_destination_var (lhs, mask_type);
11299 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
11300 rhs1, &vec_oprnds0, vectype,
11301 rhs2, &vec_oprnds1, vectype);
11302 if (swap_p)
11303 std::swap (vec_oprnds0, vec_oprnds1);
11305 /* Arguments are ready. Create the new vector stmt. */
11306 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
11308 gimple *new_stmt;
11309 vec_rhs2 = vec_oprnds1[i];
11311 new_temp = make_ssa_name (mask);
11312 if (bitop1 == NOP_EXPR)
11314 new_stmt = gimple_build_assign (new_temp, code,
11315 vec_rhs1, vec_rhs2);
11316 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11318 else
11320 if (bitop1 == BIT_NOT_EXPR)
11321 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
11322 else
11323 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
11324 vec_rhs2);
11325 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11326 if (bitop2 != NOP_EXPR)
11328 tree res = make_ssa_name (mask);
11329 if (bitop2 == BIT_NOT_EXPR)
11330 new_stmt = gimple_build_assign (res, bitop2, new_temp);
11331 else
11332 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
11333 new_temp);
11334 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11337 if (slp_node)
11338 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
11339 else
11340 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11343 if (!slp_node)
11344 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11346 vec_oprnds0.release ();
11347 vec_oprnds1.release ();
11349 return true;
11352 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
11353 can handle all live statements in the node. Otherwise return true
11354 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
11355 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
11357 static bool
11358 can_vectorize_live_stmts (vec_info *vinfo,
11359 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11360 slp_tree slp_node, slp_instance slp_node_instance,
11361 bool vec_stmt_p,
11362 stmt_vector_for_cost *cost_vec)
11364 if (slp_node)
11366 stmt_vec_info slp_stmt_info;
11367 unsigned int i;
11368 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
11370 if (STMT_VINFO_LIVE_P (slp_stmt_info)
11371 && !vectorizable_live_operation (vinfo,
11372 slp_stmt_info, gsi, slp_node,
11373 slp_node_instance, i,
11374 vec_stmt_p, cost_vec))
11375 return false;
11378 else if (STMT_VINFO_LIVE_P (stmt_info)
11379 && !vectorizable_live_operation (vinfo, stmt_info, gsi,
11380 slp_node, slp_node_instance, -1,
11381 vec_stmt_p, cost_vec))
11382 return false;
11384 return true;
11387 /* Make sure the statement is vectorizable. */
11389 opt_result
11390 vect_analyze_stmt (vec_info *vinfo,
11391 stmt_vec_info stmt_info, bool *need_to_vectorize,
11392 slp_tree node, slp_instance node_instance,
11393 stmt_vector_for_cost *cost_vec)
11395 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
11396 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
11397 bool ok;
11398 gimple_seq pattern_def_seq;
11400 if (dump_enabled_p ())
11401 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
11402 stmt_info->stmt);
11404 if (gimple_has_volatile_ops (stmt_info->stmt))
11405 return opt_result::failure_at (stmt_info->stmt,
11406 "not vectorized:"
11407 " stmt has volatile operands: %G\n",
11408 stmt_info->stmt);
11410 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11411 && node == NULL
11412 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
11414 gimple_stmt_iterator si;
11416 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
11418 stmt_vec_info pattern_def_stmt_info
11419 = vinfo->lookup_stmt (gsi_stmt (si));
11420 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
11421 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
11423 /* Analyze def stmt of STMT if it's a pattern stmt. */
11424 if (dump_enabled_p ())
11425 dump_printf_loc (MSG_NOTE, vect_location,
11426 "==> examining pattern def statement: %G",
11427 pattern_def_stmt_info->stmt);
11429 opt_result res
11430 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
11431 need_to_vectorize, node, node_instance,
11432 cost_vec);
11433 if (!res)
11434 return res;
11439 /* Skip stmts that do not need to be vectorized. In loops this is expected
11440 to include:
11441 - the COND_EXPR which is the loop exit condition
11442 - any LABEL_EXPRs in the loop
11443 - computations that are used only for array indexing or loop control.
11444 In basic blocks we only analyze statements that are a part of some SLP
11445 instance, therefore, all the statements are relevant.
11447 Pattern statement needs to be analyzed instead of the original statement
11448 if the original statement is not relevant. Otherwise, we analyze both
11449 statements. In basic blocks we are called from some SLP instance
11450 traversal, don't analyze pattern stmts instead, the pattern stmts
11451 already will be part of SLP instance. */
11453 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
11454 if (!STMT_VINFO_RELEVANT_P (stmt_info)
11455 && !STMT_VINFO_LIVE_P (stmt_info))
11457 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11458 && pattern_stmt_info
11459 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11460 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11462 /* Analyze PATTERN_STMT instead of the original stmt. */
11463 stmt_info = pattern_stmt_info;
11464 if (dump_enabled_p ())
11465 dump_printf_loc (MSG_NOTE, vect_location,
11466 "==> examining pattern statement: %G",
11467 stmt_info->stmt);
11469 else
11471 if (dump_enabled_p ())
11472 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
11474 return opt_result::success ();
11477 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11478 && node == NULL
11479 && pattern_stmt_info
11480 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11481 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11483 /* Analyze PATTERN_STMT too. */
11484 if (dump_enabled_p ())
11485 dump_printf_loc (MSG_NOTE, vect_location,
11486 "==> examining pattern statement: %G",
11487 pattern_stmt_info->stmt);
11489 opt_result res
11490 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
11491 node_instance, cost_vec);
11492 if (!res)
11493 return res;
11496 switch (STMT_VINFO_DEF_TYPE (stmt_info))
11498 case vect_internal_def:
11499 break;
11501 case vect_reduction_def:
11502 case vect_nested_cycle:
11503 gcc_assert (!bb_vinfo
11504 && (relevance == vect_used_in_outer
11505 || relevance == vect_used_in_outer_by_reduction
11506 || relevance == vect_used_by_reduction
11507 || relevance == vect_unused_in_scope
11508 || relevance == vect_used_only_live));
11509 break;
11511 case vect_induction_def:
11512 case vect_first_order_recurrence:
11513 gcc_assert (!bb_vinfo);
11514 break;
11516 case vect_constant_def:
11517 case vect_external_def:
11518 case vect_unknown_def_type:
11519 default:
11520 gcc_unreachable ();
11523 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11524 if (node)
11525 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
11527 if (STMT_VINFO_RELEVANT_P (stmt_info))
11529 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
11530 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
11531 || (call && gimple_call_lhs (call) == NULL_TREE));
11532 *need_to_vectorize = true;
11535 if (PURE_SLP_STMT (stmt_info) && !node)
11537 if (dump_enabled_p ())
11538 dump_printf_loc (MSG_NOTE, vect_location,
11539 "handled only by SLP analysis\n");
11540 return opt_result::success ();
11543 ok = true;
11544 if (!bb_vinfo
11545 && (STMT_VINFO_RELEVANT_P (stmt_info)
11546 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
11547 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11548 -mveclibabi= takes preference over library functions with
11549 the simd attribute. */
11550 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11551 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
11552 cost_vec)
11553 || vectorizable_conversion (vinfo, stmt_info,
11554 NULL, NULL, node, cost_vec)
11555 || vectorizable_operation (vinfo, stmt_info,
11556 NULL, NULL, node, cost_vec)
11557 || vectorizable_assignment (vinfo, stmt_info,
11558 NULL, NULL, node, cost_vec)
11559 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11560 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11561 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11562 node, node_instance, cost_vec)
11563 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
11564 NULL, node, cost_vec)
11565 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11566 || vectorizable_condition (vinfo, stmt_info,
11567 NULL, NULL, node, cost_vec)
11568 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11569 cost_vec)
11570 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11571 stmt_info, NULL, node)
11572 || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
11573 stmt_info, NULL, node, cost_vec));
11574 else
11576 if (bb_vinfo)
11577 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11578 || vectorizable_simd_clone_call (vinfo, stmt_info,
11579 NULL, NULL, node, cost_vec)
11580 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
11581 cost_vec)
11582 || vectorizable_shift (vinfo, stmt_info,
11583 NULL, NULL, node, cost_vec)
11584 || vectorizable_operation (vinfo, stmt_info,
11585 NULL, NULL, node, cost_vec)
11586 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
11587 cost_vec)
11588 || vectorizable_load (vinfo, stmt_info,
11589 NULL, NULL, node, cost_vec)
11590 || vectorizable_store (vinfo, stmt_info,
11591 NULL, NULL, node, cost_vec)
11592 || vectorizable_condition (vinfo, stmt_info,
11593 NULL, NULL, node, cost_vec)
11594 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11595 cost_vec)
11596 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
11599 if (node)
11600 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11602 if (!ok)
11603 return opt_result::failure_at (stmt_info->stmt,
11604 "not vectorized:"
11605 " relevant stmt not supported: %G",
11606 stmt_info->stmt);
11608 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11609 need extra handling, except for vectorizable reductions. */
11610 if (!bb_vinfo
11611 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11612 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
11613 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
11614 stmt_info, NULL, node, node_instance,
11615 false, cost_vec))
11616 return opt_result::failure_at (stmt_info->stmt,
11617 "not vectorized:"
11618 " live stmt not supported: %G",
11619 stmt_info->stmt);
11621 return opt_result::success ();
11625 /* Function vect_transform_stmt.
11627 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11629 bool
11630 vect_transform_stmt (vec_info *vinfo,
11631 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11632 slp_tree slp_node, slp_instance slp_node_instance)
11634 bool is_store = false;
11635 gimple *vec_stmt = NULL;
11636 bool done;
11638 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
11640 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11641 if (slp_node)
11642 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
11644 switch (STMT_VINFO_TYPE (stmt_info))
11646 case type_demotion_vec_info_type:
11647 case type_promotion_vec_info_type:
11648 case type_conversion_vec_info_type:
11649 done = vectorizable_conversion (vinfo, stmt_info,
11650 gsi, &vec_stmt, slp_node, NULL);
11651 gcc_assert (done);
11652 break;
11654 case induc_vec_info_type:
11655 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
11656 stmt_info, &vec_stmt, slp_node,
11657 NULL);
11658 gcc_assert (done);
11659 break;
11661 case shift_vec_info_type:
11662 done = vectorizable_shift (vinfo, stmt_info,
11663 gsi, &vec_stmt, slp_node, NULL);
11664 gcc_assert (done);
11665 break;
11667 case op_vec_info_type:
11668 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11669 NULL);
11670 gcc_assert (done);
11671 break;
11673 case assignment_vec_info_type:
11674 done = vectorizable_assignment (vinfo, stmt_info,
11675 gsi, &vec_stmt, slp_node, NULL);
11676 gcc_assert (done);
11677 break;
11679 case load_vec_info_type:
11680 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11681 NULL);
11682 gcc_assert (done);
11683 break;
11685 case store_vec_info_type:
11686 done = vectorizable_store (vinfo, stmt_info,
11687 gsi, &vec_stmt, slp_node, NULL);
11688 gcc_assert (done);
11689 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
11691 /* In case of interleaving, the whole chain is vectorized when the
11692 last store in the chain is reached. Store stmts before the last
11693 one are skipped, and there vec_stmt_info shouldn't be freed
11694 meanwhile. */
11695 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11696 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11697 is_store = true;
11699 else
11700 is_store = true;
11701 break;
11703 case condition_vec_info_type:
11704 done = vectorizable_condition (vinfo, stmt_info,
11705 gsi, &vec_stmt, slp_node, NULL);
11706 gcc_assert (done);
11707 break;
11709 case comparison_vec_info_type:
11710 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
11711 slp_node, NULL);
11712 gcc_assert (done);
11713 break;
11715 case call_vec_info_type:
11716 done = vectorizable_call (vinfo, stmt_info,
11717 gsi, &vec_stmt, slp_node, NULL);
11718 break;
11720 case call_simd_clone_vec_info_type:
11721 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
11722 slp_node, NULL);
11723 break;
11725 case reduc_vec_info_type:
11726 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11727 gsi, &vec_stmt, slp_node);
11728 gcc_assert (done);
11729 break;
11731 case cycle_phi_info_type:
11732 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
11733 &vec_stmt, slp_node, slp_node_instance);
11734 gcc_assert (done);
11735 break;
11737 case lc_phi_info_type:
11738 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11739 stmt_info, &vec_stmt, slp_node);
11740 gcc_assert (done);
11741 break;
11743 case recurr_info_type:
11744 done = vectorizable_recurr (as_a <loop_vec_info> (vinfo),
11745 stmt_info, &vec_stmt, slp_node, NULL);
11746 gcc_assert (done);
11747 break;
11749 case phi_info_type:
11750 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
11751 gcc_assert (done);
11752 break;
11754 default:
11755 if (!STMT_VINFO_LIVE_P (stmt_info))
11757 if (dump_enabled_p ())
11758 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11759 "stmt not supported.\n");
11760 gcc_unreachable ();
11762 done = true;
11765 if (!slp_node && vec_stmt)
11766 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
11768 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
11770 /* Handle stmts whose DEF is used outside the loop-nest that is
11771 being vectorized. */
11772 done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
11773 slp_node_instance, true, NULL);
11774 gcc_assert (done);
11777 if (slp_node)
11778 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11780 return is_store;
11784 /* Remove a group of stores (for SLP or interleaving), free their
11785 stmt_vec_info. */
11787 void
11788 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
11790 stmt_vec_info next_stmt_info = first_stmt_info;
11792 while (next_stmt_info)
11794 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11795 next_stmt_info = vect_orig_stmt (next_stmt_info);
11796 /* Free the attached stmt_vec_info and remove the stmt. */
11797 vinfo->remove_stmt (next_stmt_info);
11798 next_stmt_info = tmp;
11802 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11803 elements of type SCALAR_TYPE, or null if the target doesn't support
11804 such a type.
11806 If NUNITS is zero, return a vector type that contains elements of
11807 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11809 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11810 for this vectorization region and want to "autodetect" the best choice.
11811 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11812 and we want the new type to be interoperable with it. PREVAILING_MODE
11813 in this case can be a scalar integer mode or a vector mode; when it
11814 is a vector mode, the function acts like a tree-level version of
11815 related_vector_mode. */
11817 tree
11818 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11819 tree scalar_type, poly_uint64 nunits)
11821 tree orig_scalar_type = scalar_type;
11822 scalar_mode inner_mode;
11823 machine_mode simd_mode;
11824 tree vectype;
11826 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11827 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11828 return NULL_TREE;
11830 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11832 /* Interoperability between modes requires one to be a constant multiple
11833 of the other, so that the number of vectors required for each operation
11834 is a compile-time constant. */
11835 if (prevailing_mode != VOIDmode
11836 && !constant_multiple_p (nunits * nbytes,
11837 GET_MODE_SIZE (prevailing_mode))
11838 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode),
11839 nunits * nbytes))
11840 return NULL_TREE;
11842 /* For vector types of elements whose mode precision doesn't
11843 match their types precision we use a element type of mode
11844 precision. The vectorization routines will have to make sure
11845 they support the proper result truncation/extension.
11846 We also make sure to build vector types with INTEGER_TYPE
11847 component type only. */
11848 if (INTEGRAL_TYPE_P (scalar_type)
11849 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11850 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11851 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11852 TYPE_UNSIGNED (scalar_type));
11854 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11855 When the component mode passes the above test simply use a type
11856 corresponding to that mode. The theory is that any use that
11857 would cause problems with this will disable vectorization anyway. */
11858 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11859 && !INTEGRAL_TYPE_P (scalar_type))
11860 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11862 /* We can't build a vector type of elements with alignment bigger than
11863 their size. */
11864 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11865 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11866 TYPE_UNSIGNED (scalar_type));
11868 /* If we felt back to using the mode fail if there was
11869 no scalar type for it. */
11870 if (scalar_type == NULL_TREE)
11871 return NULL_TREE;
11873 /* If no prevailing mode was supplied, use the mode the target prefers.
11874 Otherwise lookup a vector mode based on the prevailing mode. */
11875 if (prevailing_mode == VOIDmode)
11877 gcc_assert (known_eq (nunits, 0U));
11878 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11879 if (SCALAR_INT_MODE_P (simd_mode))
11881 /* Traditional behavior is not to take the integer mode
11882 literally, but simply to use it as a way of determining
11883 the vector size. It is up to mode_for_vector to decide
11884 what the TYPE_MODE should be.
11886 Note that nunits == 1 is allowed in order to support single
11887 element vector types. */
11888 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11889 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11890 return NULL_TREE;
11893 else if (SCALAR_INT_MODE_P (prevailing_mode)
11894 || !related_vector_mode (prevailing_mode,
11895 inner_mode, nunits).exists (&simd_mode))
11897 /* Fall back to using mode_for_vector, mostly in the hope of being
11898 able to use an integer mode. */
11899 if (known_eq (nunits, 0U)
11900 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11901 return NULL_TREE;
11903 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11904 return NULL_TREE;
11907 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11909 /* In cases where the mode was chosen by mode_for_vector, check that
11910 the target actually supports the chosen mode, or that it at least
11911 allows the vector mode to be replaced by a like-sized integer. */
11912 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11913 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11914 return NULL_TREE;
11916 /* Re-attach the address-space qualifier if we canonicalized the scalar
11917 type. */
11918 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11919 return build_qualified_type
11920 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11922 return vectype;
11925 /* Function get_vectype_for_scalar_type.
11927 Returns the vector type corresponding to SCALAR_TYPE as supported
11928 by the target. If GROUP_SIZE is nonzero and we're performing BB
11929 vectorization, make sure that the number of elements in the vector
11930 is no bigger than GROUP_SIZE. */
11932 tree
11933 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11934 unsigned int group_size)
11936 /* For BB vectorization, we should always have a group size once we've
11937 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11938 are tentative requests during things like early data reference
11939 analysis and pattern recognition. */
11940 if (is_a <bb_vec_info> (vinfo))
11941 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11942 else
11943 group_size = 0;
11945 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11946 scalar_type);
11947 if (vectype && vinfo->vector_mode == VOIDmode)
11948 vinfo->vector_mode = TYPE_MODE (vectype);
11950 /* Register the natural choice of vector type, before the group size
11951 has been applied. */
11952 if (vectype)
11953 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11955 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11956 try again with an explicit number of elements. */
11957 if (vectype
11958 && group_size
11959 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11961 /* Start with the biggest number of units that fits within
11962 GROUP_SIZE and halve it until we find a valid vector type.
11963 Usually either the first attempt will succeed or all will
11964 fail (in the latter case because GROUP_SIZE is too small
11965 for the target), but it's possible that a target could have
11966 a hole between supported vector types.
11968 If GROUP_SIZE is not a power of 2, this has the effect of
11969 trying the largest power of 2 that fits within the group,
11970 even though the group is not a multiple of that vector size.
11971 The BB vectorizer will then try to carve up the group into
11972 smaller pieces. */
11973 unsigned int nunits = 1 << floor_log2 (group_size);
11976 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11977 scalar_type, nunits);
11978 nunits /= 2;
11980 while (nunits > 1 && !vectype);
11983 return vectype;
11986 /* Return the vector type corresponding to SCALAR_TYPE as supported
11987 by the target. NODE, if nonnull, is the SLP tree node that will
11988 use the returned vector type. */
11990 tree
11991 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11993 unsigned int group_size = 0;
11994 if (node)
11995 group_size = SLP_TREE_LANES (node);
11996 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11999 /* Function get_mask_type_for_scalar_type.
12001 Returns the mask type corresponding to a result of comparison
12002 of vectors of specified SCALAR_TYPE as supported by target.
12003 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12004 make sure that the number of elements in the vector is no bigger
12005 than GROUP_SIZE. */
12007 tree
12008 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
12009 unsigned int group_size)
12011 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12013 if (!vectype)
12014 return NULL;
12016 return truth_type_for (vectype);
12019 /* Function get_same_sized_vectype
12021 Returns a vector type corresponding to SCALAR_TYPE of size
12022 VECTOR_TYPE if supported by the target. */
12024 tree
12025 get_same_sized_vectype (tree scalar_type, tree vector_type)
12027 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
12028 return truth_type_for (vector_type);
12030 poly_uint64 nunits;
12031 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
12032 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
12033 return NULL_TREE;
12035 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
12036 scalar_type, nunits);
12039 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
12040 would not change the chosen vector modes. */
12042 bool
12043 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
12045 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
12046 i != vinfo->used_vector_modes.end (); ++i)
12047 if (!VECTOR_MODE_P (*i)
12048 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
12049 return false;
12050 return true;
12053 /* Function vect_is_simple_use.
12055 Input:
12056 VINFO - the vect info of the loop or basic block that is being vectorized.
12057 OPERAND - operand in the loop or bb.
12058 Output:
12059 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
12060 case OPERAND is an SSA_NAME that is defined in the vectorizable region
12061 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
12062 the definition could be anywhere in the function
12063 DT - the type of definition
12065 Returns whether a stmt with OPERAND can be vectorized.
12066 For loops, supportable operands are constants, loop invariants, and operands
12067 that are defined by the current iteration of the loop. Unsupportable
12068 operands are those that are defined by a previous iteration of the loop (as
12069 is the case in reduction/induction computations).
12070 For basic blocks, supportable operands are constants and bb invariants.
12071 For now, operands defined outside the basic block are not supported. */
12073 bool
12074 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
12075 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
12077 if (def_stmt_info_out)
12078 *def_stmt_info_out = NULL;
12079 if (def_stmt_out)
12080 *def_stmt_out = NULL;
12081 *dt = vect_unknown_def_type;
12083 if (dump_enabled_p ())
12085 dump_printf_loc (MSG_NOTE, vect_location,
12086 "vect_is_simple_use: operand ");
12087 if (TREE_CODE (operand) == SSA_NAME
12088 && !SSA_NAME_IS_DEFAULT_DEF (operand))
12089 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
12090 else
12091 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
12094 if (CONSTANT_CLASS_P (operand))
12095 *dt = vect_constant_def;
12096 else if (is_gimple_min_invariant (operand))
12097 *dt = vect_external_def;
12098 else if (TREE_CODE (operand) != SSA_NAME)
12099 *dt = vect_unknown_def_type;
12100 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
12101 *dt = vect_external_def;
12102 else
12104 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
12105 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
12106 if (!stmt_vinfo)
12107 *dt = vect_external_def;
12108 else
12110 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
12111 def_stmt = stmt_vinfo->stmt;
12112 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
12113 if (def_stmt_info_out)
12114 *def_stmt_info_out = stmt_vinfo;
12116 if (def_stmt_out)
12117 *def_stmt_out = def_stmt;
12120 if (dump_enabled_p ())
12122 dump_printf (MSG_NOTE, ", type of def: ");
12123 switch (*dt)
12125 case vect_uninitialized_def:
12126 dump_printf (MSG_NOTE, "uninitialized\n");
12127 break;
12128 case vect_constant_def:
12129 dump_printf (MSG_NOTE, "constant\n");
12130 break;
12131 case vect_external_def:
12132 dump_printf (MSG_NOTE, "external\n");
12133 break;
12134 case vect_internal_def:
12135 dump_printf (MSG_NOTE, "internal\n");
12136 break;
12137 case vect_induction_def:
12138 dump_printf (MSG_NOTE, "induction\n");
12139 break;
12140 case vect_reduction_def:
12141 dump_printf (MSG_NOTE, "reduction\n");
12142 break;
12143 case vect_double_reduction_def:
12144 dump_printf (MSG_NOTE, "double reduction\n");
12145 break;
12146 case vect_nested_cycle:
12147 dump_printf (MSG_NOTE, "nested cycle\n");
12148 break;
12149 case vect_first_order_recurrence:
12150 dump_printf (MSG_NOTE, "first order recurrence\n");
12151 break;
12152 case vect_unknown_def_type:
12153 dump_printf (MSG_NOTE, "unknown\n");
12154 break;
12158 if (*dt == vect_unknown_def_type)
12160 if (dump_enabled_p ())
12161 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12162 "Unsupported pattern.\n");
12163 return false;
12166 return true;
12169 /* Function vect_is_simple_use.
12171 Same as vect_is_simple_use but also determines the vector operand
12172 type of OPERAND and stores it to *VECTYPE. If the definition of
12173 OPERAND is vect_uninitialized_def, vect_constant_def or
12174 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
12175 is responsible to compute the best suited vector type for the
12176 scalar operand. */
12178 bool
12179 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
12180 tree *vectype, stmt_vec_info *def_stmt_info_out,
12181 gimple **def_stmt_out)
12183 stmt_vec_info def_stmt_info;
12184 gimple *def_stmt;
12185 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
12186 return false;
12188 if (def_stmt_out)
12189 *def_stmt_out = def_stmt;
12190 if (def_stmt_info_out)
12191 *def_stmt_info_out = def_stmt_info;
12193 /* Now get a vector type if the def is internal, otherwise supply
12194 NULL_TREE and leave it up to the caller to figure out a proper
12195 type for the use stmt. */
12196 if (*dt == vect_internal_def
12197 || *dt == vect_induction_def
12198 || *dt == vect_reduction_def
12199 || *dt == vect_double_reduction_def
12200 || *dt == vect_nested_cycle
12201 || *dt == vect_first_order_recurrence)
12203 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
12204 gcc_assert (*vectype != NULL_TREE);
12205 if (dump_enabled_p ())
12206 dump_printf_loc (MSG_NOTE, vect_location,
12207 "vect_is_simple_use: vectype %T\n", *vectype);
12209 else if (*dt == vect_uninitialized_def
12210 || *dt == vect_constant_def
12211 || *dt == vect_external_def)
12212 *vectype = NULL_TREE;
12213 else
12214 gcc_unreachable ();
12216 return true;
12219 /* Function vect_is_simple_use.
12221 Same as vect_is_simple_use but determines the operand by operand
12222 position OPERAND from either STMT or SLP_NODE, filling in *OP
12223 and *SLP_DEF (when SLP_NODE is not NULL). */
12225 bool
12226 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
12227 unsigned operand, tree *op, slp_tree *slp_def,
12228 enum vect_def_type *dt,
12229 tree *vectype, stmt_vec_info *def_stmt_info_out)
12231 if (slp_node)
12233 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
12234 *slp_def = child;
12235 *vectype = SLP_TREE_VECTYPE (child);
12236 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
12238 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
12239 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
12241 else
12243 if (def_stmt_info_out)
12244 *def_stmt_info_out = NULL;
12245 *op = SLP_TREE_SCALAR_OPS (child)[0];
12246 *dt = SLP_TREE_DEF_TYPE (child);
12247 return true;
12250 else
12252 *slp_def = NULL;
12253 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
12255 if (gimple_assign_rhs_code (ass) == COND_EXPR
12256 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
12258 if (operand < 2)
12259 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
12260 else
12261 *op = gimple_op (ass, operand);
12263 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
12264 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
12265 else
12266 *op = gimple_op (ass, operand + 1);
12268 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
12269 *op = gimple_call_arg (call, operand);
12270 else
12271 gcc_unreachable ();
12272 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
12276 /* If OP is not NULL and is external or constant update its vector
12277 type with VECTYPE. Returns true if successful or false if not,
12278 for example when conflicting vector types are present. */
12280 bool
12281 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
12283 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
12284 return true;
12285 if (SLP_TREE_VECTYPE (op))
12286 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
12287 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
12288 should be handled by patters. Allow vect_constant_def for now. */
12289 if (VECTOR_BOOLEAN_TYPE_P (vectype)
12290 && SLP_TREE_DEF_TYPE (op) == vect_external_def)
12291 return false;
12292 SLP_TREE_VECTYPE (op) = vectype;
12293 return true;
12296 /* Function supportable_widening_operation
12298 Check whether an operation represented by the code CODE is a
12299 widening operation that is supported by the target platform in
12300 vector form (i.e., when operating on arguments of type VECTYPE_IN
12301 producing a result of type VECTYPE_OUT).
12303 Widening operations we currently support are NOP (CONVERT), FLOAT,
12304 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
12305 are supported by the target platform either directly (via vector
12306 tree-codes), or via target builtins.
12308 Output:
12309 - CODE1 and CODE2 are codes of vector operations to be used when
12310 vectorizing the operation, if available.
12311 - MULTI_STEP_CVT determines the number of required intermediate steps in
12312 case of multi-step conversion (like char->short->int - in that case
12313 MULTI_STEP_CVT will be 1).
12314 - INTERM_TYPES contains the intermediate type required to perform the
12315 widening operation (short in the above example). */
12317 bool
12318 supportable_widening_operation (vec_info *vinfo,
12319 enum tree_code code, stmt_vec_info stmt_info,
12320 tree vectype_out, tree vectype_in,
12321 enum tree_code *code1, enum tree_code *code2,
12322 int *multi_step_cvt,
12323 vec<tree> *interm_types)
12325 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
12326 class loop *vect_loop = NULL;
12327 machine_mode vec_mode;
12328 enum insn_code icode1, icode2;
12329 optab optab1, optab2;
12330 tree vectype = vectype_in;
12331 tree wide_vectype = vectype_out;
12332 enum tree_code c1, c2;
12333 int i;
12334 tree prev_type, intermediate_type;
12335 machine_mode intermediate_mode, prev_mode;
12336 optab optab3, optab4;
12338 *multi_step_cvt = 0;
12339 if (loop_info)
12340 vect_loop = LOOP_VINFO_LOOP (loop_info);
12342 switch (code)
12344 case WIDEN_MULT_EXPR:
12345 /* The result of a vectorized widening operation usually requires
12346 two vectors (because the widened results do not fit into one vector).
12347 The generated vector results would normally be expected to be
12348 generated in the same order as in the original scalar computation,
12349 i.e. if 8 results are generated in each vector iteration, they are
12350 to be organized as follows:
12351 vect1: [res1,res2,res3,res4],
12352 vect2: [res5,res6,res7,res8].
12354 However, in the special case that the result of the widening
12355 operation is used in a reduction computation only, the order doesn't
12356 matter (because when vectorizing a reduction we change the order of
12357 the computation). Some targets can take advantage of this and
12358 generate more efficient code. For example, targets like Altivec,
12359 that support widen_mult using a sequence of {mult_even,mult_odd}
12360 generate the following vectors:
12361 vect1: [res1,res3,res5,res7],
12362 vect2: [res2,res4,res6,res8].
12364 When vectorizing outer-loops, we execute the inner-loop sequentially
12365 (each vectorized inner-loop iteration contributes to VF outer-loop
12366 iterations in parallel). We therefore don't allow to change the
12367 order of the computation in the inner-loop during outer-loop
12368 vectorization. */
12369 /* TODO: Another case in which order doesn't *really* matter is when we
12370 widen and then contract again, e.g. (short)((int)x * y >> 8).
12371 Normally, pack_trunc performs an even/odd permute, whereas the
12372 repack from an even/odd expansion would be an interleave, which
12373 would be significantly simpler for e.g. AVX2. */
12374 /* In any case, in order to avoid duplicating the code below, recurse
12375 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
12376 are properly set up for the caller. If we fail, we'll continue with
12377 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
12378 if (vect_loop
12379 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
12380 && !nested_in_vect_loop_p (vect_loop, stmt_info)
12381 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
12382 stmt_info, vectype_out,
12383 vectype_in, code1, code2,
12384 multi_step_cvt, interm_types))
12386 /* Elements in a vector with vect_used_by_reduction property cannot
12387 be reordered if the use chain with this property does not have the
12388 same operation. One such an example is s += a * b, where elements
12389 in a and b cannot be reordered. Here we check if the vector defined
12390 by STMT is only directly used in the reduction statement. */
12391 tree lhs = gimple_assign_lhs (stmt_info->stmt);
12392 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
12393 if (use_stmt_info
12394 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
12395 return true;
12397 c1 = VEC_WIDEN_MULT_LO_EXPR;
12398 c2 = VEC_WIDEN_MULT_HI_EXPR;
12399 break;
12401 case DOT_PROD_EXPR:
12402 c1 = DOT_PROD_EXPR;
12403 c2 = DOT_PROD_EXPR;
12404 break;
12406 case SAD_EXPR:
12407 c1 = SAD_EXPR;
12408 c2 = SAD_EXPR;
12409 break;
12411 case VEC_WIDEN_MULT_EVEN_EXPR:
12412 /* Support the recursion induced just above. */
12413 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
12414 c2 = VEC_WIDEN_MULT_ODD_EXPR;
12415 break;
12417 case WIDEN_LSHIFT_EXPR:
12418 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
12419 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
12420 break;
12422 case WIDEN_PLUS_EXPR:
12423 c1 = VEC_WIDEN_PLUS_LO_EXPR;
12424 c2 = VEC_WIDEN_PLUS_HI_EXPR;
12425 break;
12427 case WIDEN_MINUS_EXPR:
12428 c1 = VEC_WIDEN_MINUS_LO_EXPR;
12429 c2 = VEC_WIDEN_MINUS_HI_EXPR;
12430 break;
12432 CASE_CONVERT:
12433 c1 = VEC_UNPACK_LO_EXPR;
12434 c2 = VEC_UNPACK_HI_EXPR;
12435 break;
12437 case FLOAT_EXPR:
12438 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
12439 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
12440 break;
12442 case FIX_TRUNC_EXPR:
12443 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
12444 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
12445 break;
12447 default:
12448 gcc_unreachable ();
12451 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
12452 std::swap (c1, c2);
12454 if (code == FIX_TRUNC_EXPR)
12456 /* The signedness is determined from output operand. */
12457 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12458 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
12460 else if (CONVERT_EXPR_CODE_P (code)
12461 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
12462 && VECTOR_BOOLEAN_TYPE_P (vectype)
12463 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
12464 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
12466 /* If the input and result modes are the same, a different optab
12467 is needed where we pass in the number of units in vectype. */
12468 optab1 = vec_unpacks_sbool_lo_optab;
12469 optab2 = vec_unpacks_sbool_hi_optab;
12471 else
12473 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12474 optab2 = optab_for_tree_code (c2, vectype, optab_default);
12477 if (!optab1 || !optab2)
12478 return false;
12480 vec_mode = TYPE_MODE (vectype);
12481 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
12482 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
12483 return false;
12485 *code1 = c1;
12486 *code2 = c2;
12488 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12489 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12491 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12492 return true;
12493 /* For scalar masks we may have different boolean
12494 vector types having the same QImode. Thus we
12495 add additional check for elements number. */
12496 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
12497 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12498 return true;
12501 /* Check if it's a multi-step conversion that can be done using intermediate
12502 types. */
12504 prev_type = vectype;
12505 prev_mode = vec_mode;
12507 if (!CONVERT_EXPR_CODE_P (code))
12508 return false;
12510 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12511 intermediate steps in promotion sequence. We try
12512 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
12513 not. */
12514 interm_types->create (MAX_INTERM_CVT_STEPS);
12515 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12517 intermediate_mode = insn_data[icode1].operand[0].mode;
12518 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12519 intermediate_type
12520 = vect_halve_mask_nunits (prev_type, intermediate_mode);
12521 else if (VECTOR_MODE_P (intermediate_mode))
12523 tree intermediate_element_type
12524 = lang_hooks.types.type_for_mode (GET_MODE_INNER (intermediate_mode),
12525 TYPE_UNSIGNED (prev_type));
12526 intermediate_type
12527 = build_vector_type_for_mode (intermediate_element_type,
12528 intermediate_mode);
12530 else
12531 intermediate_type
12532 = lang_hooks.types.type_for_mode (intermediate_mode,
12533 TYPE_UNSIGNED (prev_type));
12535 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12536 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12537 && intermediate_mode == prev_mode
12538 && SCALAR_INT_MODE_P (prev_mode))
12540 /* If the input and result modes are the same, a different optab
12541 is needed where we pass in the number of units in vectype. */
12542 optab3 = vec_unpacks_sbool_lo_optab;
12543 optab4 = vec_unpacks_sbool_hi_optab;
12545 else
12547 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
12548 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
12551 if (!optab3 || !optab4
12552 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
12553 || insn_data[icode1].operand[0].mode != intermediate_mode
12554 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
12555 || insn_data[icode2].operand[0].mode != intermediate_mode
12556 || ((icode1 = optab_handler (optab3, intermediate_mode))
12557 == CODE_FOR_nothing)
12558 || ((icode2 = optab_handler (optab4, intermediate_mode))
12559 == CODE_FOR_nothing))
12560 break;
12562 interm_types->quick_push (intermediate_type);
12563 (*multi_step_cvt)++;
12565 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12566 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12568 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12569 return true;
12570 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
12571 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12572 return true;
12575 prev_type = intermediate_type;
12576 prev_mode = intermediate_mode;
12579 interm_types->release ();
12580 return false;
12584 /* Function supportable_narrowing_operation
12586 Check whether an operation represented by the code CODE is a
12587 narrowing operation that is supported by the target platform in
12588 vector form (i.e., when operating on arguments of type VECTYPE_IN
12589 and producing a result of type VECTYPE_OUT).
12591 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12592 and FLOAT. This function checks if these operations are supported by
12593 the target platform directly via vector tree-codes.
12595 Output:
12596 - CODE1 is the code of a vector operation to be used when
12597 vectorizing the operation, if available.
12598 - MULTI_STEP_CVT determines the number of required intermediate steps in
12599 case of multi-step conversion (like int->short->char - in that case
12600 MULTI_STEP_CVT will be 1).
12601 - INTERM_TYPES contains the intermediate type required to perform the
12602 narrowing operation (short in the above example). */
12604 bool
12605 supportable_narrowing_operation (enum tree_code code,
12606 tree vectype_out, tree vectype_in,
12607 enum tree_code *code1, int *multi_step_cvt,
12608 vec<tree> *interm_types)
12610 machine_mode vec_mode;
12611 enum insn_code icode1;
12612 optab optab1, interm_optab;
12613 tree vectype = vectype_in;
12614 tree narrow_vectype = vectype_out;
12615 enum tree_code c1;
12616 tree intermediate_type, prev_type;
12617 machine_mode intermediate_mode, prev_mode;
12618 int i;
12619 unsigned HOST_WIDE_INT n_elts;
12620 bool uns;
12622 *multi_step_cvt = 0;
12623 switch (code)
12625 CASE_CONVERT:
12626 c1 = VEC_PACK_TRUNC_EXPR;
12627 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
12628 && VECTOR_BOOLEAN_TYPE_P (vectype)
12629 && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
12630 && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
12631 && n_elts < BITS_PER_UNIT)
12632 optab1 = vec_pack_sbool_trunc_optab;
12633 else
12634 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12635 break;
12637 case FIX_TRUNC_EXPR:
12638 c1 = VEC_PACK_FIX_TRUNC_EXPR;
12639 /* The signedness is determined from output operand. */
12640 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12641 break;
12643 case FLOAT_EXPR:
12644 c1 = VEC_PACK_FLOAT_EXPR;
12645 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12646 break;
12648 default:
12649 gcc_unreachable ();
12652 if (!optab1)
12653 return false;
12655 vec_mode = TYPE_MODE (vectype);
12656 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
12657 return false;
12659 *code1 = c1;
12661 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12663 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12664 return true;
12665 /* For scalar masks we may have different boolean
12666 vector types having the same QImode. Thus we
12667 add additional check for elements number. */
12668 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
12669 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12670 return true;
12673 if (code == FLOAT_EXPR)
12674 return false;
12676 /* Check if it's a multi-step conversion that can be done using intermediate
12677 types. */
12678 prev_mode = vec_mode;
12679 prev_type = vectype;
12680 if (code == FIX_TRUNC_EXPR)
12681 uns = TYPE_UNSIGNED (vectype_out);
12682 else
12683 uns = TYPE_UNSIGNED (vectype);
12685 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12686 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12687 costly than signed. */
12688 if (code == FIX_TRUNC_EXPR && uns)
12690 enum insn_code icode2;
12692 intermediate_type
12693 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
12694 interm_optab
12695 = optab_for_tree_code (c1, intermediate_type, optab_default);
12696 if (interm_optab != unknown_optab
12697 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
12698 && insn_data[icode1].operand[0].mode
12699 == insn_data[icode2].operand[0].mode)
12701 uns = false;
12702 optab1 = interm_optab;
12703 icode1 = icode2;
12707 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12708 intermediate steps in promotion sequence. We try
12709 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12710 interm_types->create (MAX_INTERM_CVT_STEPS);
12711 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12713 intermediate_mode = insn_data[icode1].operand[0].mode;
12714 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12715 intermediate_type
12716 = vect_double_mask_nunits (prev_type, intermediate_mode);
12717 else
12718 intermediate_type
12719 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12720 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12721 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12722 && SCALAR_INT_MODE_P (prev_mode)
12723 && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
12724 && n_elts < BITS_PER_UNIT)
12725 interm_optab = vec_pack_sbool_trunc_optab;
12726 else
12727 interm_optab
12728 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12729 optab_default);
12730 if (!interm_optab
12731 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12732 || insn_data[icode1].operand[0].mode != intermediate_mode
12733 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12734 == CODE_FOR_nothing))
12735 break;
12737 interm_types->quick_push (intermediate_type);
12738 (*multi_step_cvt)++;
12740 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12742 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12743 return true;
12744 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12745 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12746 return true;
12749 prev_mode = intermediate_mode;
12750 prev_type = intermediate_type;
12751 optab1 = interm_optab;
12754 interm_types->release ();
12755 return false;
12758 /* Generate and return a vector mask of MASK_TYPE such that
12759 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12760 Add the statements to SEQ. */
12762 tree
12763 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
12764 tree end_index, const char *name)
12766 tree cmp_type = TREE_TYPE (start_index);
12767 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12768 cmp_type, mask_type,
12769 OPTIMIZE_FOR_SPEED));
12770 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12771 start_index, end_index,
12772 build_zero_cst (mask_type));
12773 tree tmp;
12774 if (name)
12775 tmp = make_temp_ssa_name (mask_type, NULL, name);
12776 else
12777 tmp = make_ssa_name (mask_type);
12778 gimple_call_set_lhs (call, tmp);
12779 gimple_seq_add_stmt (seq, call);
12780 return tmp;
12783 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12784 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12786 tree
12787 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12788 tree end_index)
12790 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
12791 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12794 /* Try to compute the vector types required to vectorize STMT_INFO,
12795 returning true on success and false if vectorization isn't possible.
12796 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12797 take sure that the number of elements in the vectors is no bigger
12798 than GROUP_SIZE.
12800 On success:
12802 - Set *STMT_VECTYPE_OUT to:
12803 - NULL_TREE if the statement doesn't need to be vectorized;
12804 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12806 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12807 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12808 statement does not help to determine the overall number of units. */
12810 opt_result
12811 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
12812 tree *stmt_vectype_out,
12813 tree *nunits_vectype_out,
12814 unsigned int group_size)
12816 gimple *stmt = stmt_info->stmt;
12818 /* For BB vectorization, we should always have a group size once we've
12819 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12820 are tentative requests during things like early data reference
12821 analysis and pattern recognition. */
12822 if (is_a <bb_vec_info> (vinfo))
12823 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12824 else
12825 group_size = 0;
12827 *stmt_vectype_out = NULL_TREE;
12828 *nunits_vectype_out = NULL_TREE;
12830 if (gimple_get_lhs (stmt) == NULL_TREE
12831 /* MASK_STORE has no lhs, but is ok. */
12832 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12834 if (is_a <gcall *> (stmt))
12836 /* Ignore calls with no lhs. These must be calls to
12837 #pragma omp simd functions, and what vectorization factor
12838 it really needs can't be determined until
12839 vectorizable_simd_clone_call. */
12840 if (dump_enabled_p ())
12841 dump_printf_loc (MSG_NOTE, vect_location,
12842 "defer to SIMD clone analysis.\n");
12843 return opt_result::success ();
12846 return opt_result::failure_at (stmt,
12847 "not vectorized: irregular stmt.%G", stmt);
12850 tree vectype;
12851 tree scalar_type = NULL_TREE;
12852 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12854 vectype = STMT_VINFO_VECTYPE (stmt_info);
12855 if (dump_enabled_p ())
12856 dump_printf_loc (MSG_NOTE, vect_location,
12857 "precomputed vectype: %T\n", vectype);
12859 else if (vect_use_mask_type_p (stmt_info))
12861 unsigned int precision = stmt_info->mask_precision;
12862 scalar_type = build_nonstandard_integer_type (precision, 1);
12863 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12864 if (!vectype)
12865 return opt_result::failure_at (stmt, "not vectorized: unsupported"
12866 " data-type %T\n", scalar_type);
12867 if (dump_enabled_p ())
12868 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12870 else
12872 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12873 scalar_type = TREE_TYPE (DR_REF (dr));
12874 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12875 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12876 else
12877 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12879 if (dump_enabled_p ())
12881 if (group_size)
12882 dump_printf_loc (MSG_NOTE, vect_location,
12883 "get vectype for scalar type (group size %d):"
12884 " %T\n", group_size, scalar_type);
12885 else
12886 dump_printf_loc (MSG_NOTE, vect_location,
12887 "get vectype for scalar type: %T\n", scalar_type);
12889 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12890 if (!vectype)
12891 return opt_result::failure_at (stmt,
12892 "not vectorized:"
12893 " unsupported data-type %T\n",
12894 scalar_type);
12896 if (dump_enabled_p ())
12897 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12900 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
12901 return opt_result::failure_at (stmt,
12902 "not vectorized: vector stmt in loop:%G",
12903 stmt);
12905 *stmt_vectype_out = vectype;
12907 /* Don't try to compute scalar types if the stmt produces a boolean
12908 vector; use the existing vector type instead. */
12909 tree nunits_vectype = vectype;
12910 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12912 /* The number of units is set according to the smallest scalar
12913 type (or the largest vector size, but we only support one
12914 vector size per vectorization). */
12915 scalar_type = vect_get_smallest_scalar_type (stmt_info,
12916 TREE_TYPE (vectype));
12917 if (scalar_type != TREE_TYPE (vectype))
12919 if (dump_enabled_p ())
12920 dump_printf_loc (MSG_NOTE, vect_location,
12921 "get vectype for smallest scalar type: %T\n",
12922 scalar_type);
12923 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12924 group_size);
12925 if (!nunits_vectype)
12926 return opt_result::failure_at
12927 (stmt, "not vectorized: unsupported data-type %T\n",
12928 scalar_type);
12929 if (dump_enabled_p ())
12930 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12931 nunits_vectype);
12935 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12936 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
12937 return opt_result::failure_at (stmt,
12938 "Not vectorized: Incompatible number "
12939 "of vector subparts between %T and %T\n",
12940 nunits_vectype, *stmt_vectype_out);
12942 if (dump_enabled_p ())
12944 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12945 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12946 dump_printf (MSG_NOTE, "\n");
12949 *nunits_vectype_out = nunits_vectype;
12950 return opt_result::success ();
12953 /* Generate and return statement sequence that sets vector length LEN that is:
12955 min_of_start_and_end = min (START_INDEX, END_INDEX);
12956 left_len = END_INDEX - min_of_start_and_end;
12957 rhs = min (left_len, LEN_LIMIT);
12958 LEN = rhs;
12960 Note: the cost of the code generated by this function is modeled
12961 by vect_estimate_min_profitable_iters, so changes here may need
12962 corresponding changes there. */
12964 gimple_seq
12965 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
12967 gimple_seq stmts = NULL;
12968 tree len_type = TREE_TYPE (len);
12969 gcc_assert (TREE_TYPE (start_index) == len_type);
12971 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
12972 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
12973 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
12974 gimple* stmt = gimple_build_assign (len, rhs);
12975 gimple_seq_add_stmt (&stmts, stmt);
12977 return stmts;