Require target lra in gcc.c-torture/compile/asmgoto-6.c
[official-gcc.git] / gcc / tree-vect-stmts.cc
blobed28fbdced33897ecb72ca9e06ddc6f424b563d6
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "gimple-range.h"
55 #include "tree-ssa-loop-niter.h"
56 #include "gimple-fold.h"
57 #include "regs.h"
58 #include "attribs.h"
59 #include "optabs-libfuncs.h"
61 /* For lang_hooks.types.type_for_mode. */
62 #include "langhooks.h"
64 /* Return the vectorized type for the given statement. */
66 tree
67 stmt_vectype (class _stmt_vec_info *stmt_info)
69 return STMT_VINFO_VECTYPE (stmt_info);
72 /* Return TRUE iff the given statement is in an inner loop relative to
73 the loop being vectorized. */
74 bool
75 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
77 gimple *stmt = STMT_VINFO_STMT (stmt_info);
78 basic_block bb = gimple_bb (stmt);
79 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
80 class loop* loop;
82 if (!loop_vinfo)
83 return false;
85 loop = LOOP_VINFO_LOOP (loop_vinfo);
87 return (bb->loop_father == loop->inner);
90 /* Record the cost of a statement, either by directly informing the
91 target model or by saving it in a vector for later processing.
92 Return a preliminary estimate of the statement's cost. */
94 static unsigned
95 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
96 enum vect_cost_for_stmt kind,
97 stmt_vec_info stmt_info, slp_tree node,
98 tree vectype, int misalign,
99 enum vect_cost_model_location where)
101 if ((kind == vector_load || kind == unaligned_load)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_gather_load;
104 if ((kind == vector_store || kind == unaligned_store)
105 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
106 kind = vector_scatter_store;
108 stmt_info_for_cost si
109 = { count, kind, where, stmt_info, node, vectype, misalign };
110 body_cost_vec->safe_push (si);
112 return (unsigned)
113 (builtin_vectorization_cost (kind, vectype, misalign) * count);
116 unsigned
117 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
118 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
119 tree vectype, int misalign,
120 enum vect_cost_model_location where)
122 return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
123 vectype, misalign, where);
126 unsigned
127 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
128 enum vect_cost_for_stmt kind, slp_tree node,
129 tree vectype, int misalign,
130 enum vect_cost_model_location where)
132 return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
133 vectype, misalign, where);
136 unsigned
137 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
138 enum vect_cost_for_stmt kind,
139 enum vect_cost_model_location where)
141 gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
142 || kind == scalar_stmt);
143 return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
144 NULL_TREE, 0, where);
147 /* Return a variable of type ELEM_TYPE[NELEMS]. */
149 static tree
150 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
152 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
153 "vect_array");
156 /* ARRAY is an array of vectors created by create_vector_array.
157 Return an SSA_NAME for the vector in index N. The reference
158 is part of the vectorization of STMT_INFO and the vector is associated
159 with scalar destination SCALAR_DEST. */
161 static tree
162 read_vector_array (vec_info *vinfo,
163 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
164 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
166 tree vect_type, vect, vect_name, array_ref;
167 gimple *new_stmt;
169 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
170 vect_type = TREE_TYPE (TREE_TYPE (array));
171 vect = vect_create_destination_var (scalar_dest, vect_type);
172 array_ref = build4 (ARRAY_REF, vect_type, array,
173 build_int_cst (size_type_node, n),
174 NULL_TREE, NULL_TREE);
176 new_stmt = gimple_build_assign (vect, array_ref);
177 vect_name = make_ssa_name (vect, new_stmt);
178 gimple_assign_set_lhs (new_stmt, vect_name);
179 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
181 return vect_name;
184 /* ARRAY is an array of vectors created by create_vector_array.
185 Emit code to store SSA_NAME VECT in index N of the array.
186 The store is part of the vectorization of STMT_INFO. */
188 static void
189 write_vector_array (vec_info *vinfo,
190 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
191 tree vect, tree array, unsigned HOST_WIDE_INT n)
193 tree array_ref;
194 gimple *new_stmt;
196 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
197 build_int_cst (size_type_node, n),
198 NULL_TREE, NULL_TREE);
200 new_stmt = gimple_build_assign (array_ref, vect);
201 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
204 /* PTR is a pointer to an array of type TYPE. Return a representation
205 of *PTR. The memory reference replaces those in FIRST_DR
206 (and its group). */
208 static tree
209 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
211 tree mem_ref;
213 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
214 /* Arrays have the same alignment as their type. */
215 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
216 return mem_ref;
219 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
220 Emit the clobber before *GSI. */
222 static void
223 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
224 gimple_stmt_iterator *gsi, tree var)
226 tree clobber = build_clobber (TREE_TYPE (var));
227 gimple *new_stmt = gimple_build_assign (var, clobber);
228 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
231 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
233 /* Function vect_mark_relevant.
235 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
237 static void
238 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
239 enum vect_relevant relevant, bool live_p)
241 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
242 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
244 if (dump_enabled_p ())
245 dump_printf_loc (MSG_NOTE, vect_location,
246 "mark relevant %d, live %d: %G", relevant, live_p,
247 stmt_info->stmt);
249 /* If this stmt is an original stmt in a pattern, we might need to mark its
250 related pattern stmt instead of the original stmt. However, such stmts
251 may have their own uses that are not in any pattern, in such cases the
252 stmt itself should be marked. */
253 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
255 /* This is the last stmt in a sequence that was detected as a
256 pattern that can potentially be vectorized. Don't mark the stmt
257 as relevant/live because it's not going to be vectorized.
258 Instead mark the pattern-stmt that replaces it. */
260 if (dump_enabled_p ())
261 dump_printf_loc (MSG_NOTE, vect_location,
262 "last stmt in pattern. don't mark"
263 " relevant/live.\n");
265 stmt_vec_info old_stmt_info = stmt_info;
266 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
267 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
268 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
269 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
271 if (live_p && relevant == vect_unused_in_scope)
273 if (dump_enabled_p ())
274 dump_printf_loc (MSG_NOTE, vect_location,
275 "vec_stmt_relevant_p: forcing live pattern stmt "
276 "relevant.\n");
277 relevant = vect_used_only_live;
280 if (dump_enabled_p ())
281 dump_printf_loc (MSG_NOTE, vect_location,
282 "mark relevant %d, live %d: %G", relevant, live_p,
283 stmt_info->stmt);
286 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
287 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
288 STMT_VINFO_RELEVANT (stmt_info) = relevant;
290 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
291 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
293 if (dump_enabled_p ())
294 dump_printf_loc (MSG_NOTE, vect_location,
295 "already marked relevant/live.\n");
296 return;
299 worklist->safe_push (stmt_info);
303 /* Function is_simple_and_all_uses_invariant
305 Return true if STMT_INFO is simple and all uses of it are invariant. */
307 bool
308 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
309 loop_vec_info loop_vinfo)
311 tree op;
312 ssa_op_iter iter;
314 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
315 if (!stmt)
316 return false;
318 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
320 enum vect_def_type dt = vect_uninitialized_def;
322 if (!vect_is_simple_use (op, loop_vinfo, &dt))
324 if (dump_enabled_p ())
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
326 "use not simple.\n");
327 return false;
330 if (dt != vect_external_def && dt != vect_constant_def)
331 return false;
333 return true;
336 /* Function vect_stmt_relevant_p.
338 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
339 is "relevant for vectorization".
341 A stmt is considered "relevant for vectorization" if:
342 - it has uses outside the loop.
343 - it has vdefs (it alters memory).
344 - control stmts in the loop (except for the exit condition).
346 CHECKME: what other side effects would the vectorizer allow? */
348 static bool
349 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
350 enum vect_relevant *relevant, bool *live_p)
352 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
353 ssa_op_iter op_iter;
354 imm_use_iterator imm_iter;
355 use_operand_p use_p;
356 def_operand_p def_p;
358 *relevant = vect_unused_in_scope;
359 *live_p = false;
361 /* cond stmt other than loop exit cond. */
362 if (is_ctrl_stmt (stmt_info->stmt)
363 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
364 *relevant = vect_used_in_scope;
366 /* changing memory. */
367 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
368 if (gimple_vdef (stmt_info->stmt)
369 && !gimple_clobber_p (stmt_info->stmt))
371 if (dump_enabled_p ())
372 dump_printf_loc (MSG_NOTE, vect_location,
373 "vec_stmt_relevant_p: stmt has vdefs.\n");
374 *relevant = vect_used_in_scope;
377 /* uses outside the loop. */
378 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
380 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
382 basic_block bb = gimple_bb (USE_STMT (use_p));
383 if (!flow_bb_inside_loop_p (loop, bb))
385 if (is_gimple_debug (USE_STMT (use_p)))
386 continue;
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_NOTE, vect_location,
390 "vec_stmt_relevant_p: used out of loop.\n");
392 /* We expect all such uses to be in the loop exit phis
393 (because of loop closed form) */
394 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
395 gcc_assert (bb == single_exit (loop)->dest);
397 *live_p = true;
402 if (*live_p && *relevant == vect_unused_in_scope
403 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
405 if (dump_enabled_p ())
406 dump_printf_loc (MSG_NOTE, vect_location,
407 "vec_stmt_relevant_p: stmt live but not relevant.\n");
408 *relevant = vect_used_only_live;
411 return (*live_p || *relevant);
415 /* Function exist_non_indexing_operands_for_use_p
417 USE is one of the uses attached to STMT_INFO. Check if USE is
418 used in STMT_INFO for anything other than indexing an array. */
420 static bool
421 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
423 tree operand;
425 /* USE corresponds to some operand in STMT. If there is no data
426 reference in STMT, then any operand that corresponds to USE
427 is not indexing an array. */
428 if (!STMT_VINFO_DATA_REF (stmt_info))
429 return true;
431 /* STMT has a data_ref. FORNOW this means that its of one of
432 the following forms:
433 -1- ARRAY_REF = var
434 -2- var = ARRAY_REF
435 (This should have been verified in analyze_data_refs).
437 'var' in the second case corresponds to a def, not a use,
438 so USE cannot correspond to any operands that are not used
439 for array indexing.
441 Therefore, all we need to check is if STMT falls into the
442 first case, and whether var corresponds to USE. */
444 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
445 if (!assign || !gimple_assign_copy_p (assign))
447 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
448 if (call && gimple_call_internal_p (call))
450 internal_fn ifn = gimple_call_internal_fn (call);
451 int mask_index = internal_fn_mask_index (ifn);
452 if (mask_index >= 0
453 && use == gimple_call_arg (call, mask_index))
454 return true;
455 int stored_value_index = internal_fn_stored_value_index (ifn);
456 if (stored_value_index >= 0
457 && use == gimple_call_arg (call, stored_value_index))
458 return true;
459 if (internal_gather_scatter_fn_p (ifn)
460 && use == gimple_call_arg (call, 1))
461 return true;
463 return false;
466 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
467 return false;
468 operand = gimple_assign_rhs1 (assign);
469 if (TREE_CODE (operand) != SSA_NAME)
470 return false;
472 if (operand == use)
473 return true;
475 return false;
480 Function process_use.
482 Inputs:
483 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
484 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
485 that defined USE. This is done by calling mark_relevant and passing it
486 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
487 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
488 be performed.
490 Outputs:
491 Generally, LIVE_P and RELEVANT are used to define the liveness and
492 relevance info of the DEF_STMT of this USE:
493 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
494 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
495 Exceptions:
496 - case 1: If USE is used only for address computations (e.g. array indexing),
497 which does not need to be directly vectorized, then the liveness/relevance
498 of the respective DEF_STMT is left unchanged.
499 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
500 we skip DEF_STMT cause it had already been processed.
501 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
502 "relevant" will be modified accordingly.
504 Return true if everything is as expected. Return false otherwise. */
506 static opt_result
507 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
508 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
509 bool force)
511 stmt_vec_info dstmt_vinfo;
512 enum vect_def_type dt;
514 /* case 1: we are only interested in uses that need to be vectorized. Uses
515 that are used for address computation are not considered relevant. */
516 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
517 return opt_result::success ();
519 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
520 return opt_result::failure_at (stmt_vinfo->stmt,
521 "not vectorized:"
522 " unsupported use in stmt.\n");
524 if (!dstmt_vinfo)
525 return opt_result::success ();
527 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
528 basic_block bb = gimple_bb (stmt_vinfo->stmt);
530 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
531 We have to force the stmt live since the epilogue loop needs it to
532 continue computing the reduction. */
533 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
534 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
535 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
536 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
537 && bb->loop_father == def_bb->loop_father)
539 if (dump_enabled_p ())
540 dump_printf_loc (MSG_NOTE, vect_location,
541 "reduc-stmt defining reduc-phi in the same nest.\n");
542 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
543 return opt_result::success ();
546 /* case 3a: outer-loop stmt defining an inner-loop stmt:
547 outer-loop-header-bb:
548 d = dstmt_vinfo
549 inner-loop:
550 stmt # use (d)
551 outer-loop-tail-bb:
552 ... */
553 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
555 if (dump_enabled_p ())
556 dump_printf_loc (MSG_NOTE, vect_location,
557 "outer-loop def-stmt defining inner-loop stmt.\n");
559 switch (relevant)
561 case vect_unused_in_scope:
562 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
563 vect_used_in_scope : vect_unused_in_scope;
564 break;
566 case vect_used_in_outer_by_reduction:
567 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
568 relevant = vect_used_by_reduction;
569 break;
571 case vect_used_in_outer:
572 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
573 relevant = vect_used_in_scope;
574 break;
576 case vect_used_in_scope:
577 break;
579 default:
580 gcc_unreachable ();
584 /* case 3b: inner-loop stmt defining an outer-loop stmt:
585 outer-loop-header-bb:
587 inner-loop:
588 d = dstmt_vinfo
589 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
590 stmt # use (d) */
591 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
593 if (dump_enabled_p ())
594 dump_printf_loc (MSG_NOTE, vect_location,
595 "inner-loop def-stmt defining outer-loop stmt.\n");
597 switch (relevant)
599 case vect_unused_in_scope:
600 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
601 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
602 vect_used_in_outer_by_reduction : vect_unused_in_scope;
603 break;
605 case vect_used_by_reduction:
606 case vect_used_only_live:
607 relevant = vect_used_in_outer_by_reduction;
608 break;
610 case vect_used_in_scope:
611 relevant = vect_used_in_outer;
612 break;
614 default:
615 gcc_unreachable ();
618 /* We are also not interested in uses on loop PHI backedges that are
619 inductions. Otherwise we'll needlessly vectorize the IV increment
620 and cause hybrid SLP for SLP inductions. Unless the PHI is live
621 of course. */
622 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
623 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
624 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
625 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
626 loop_latch_edge (bb->loop_father))
627 == use))
629 if (dump_enabled_p ())
630 dump_printf_loc (MSG_NOTE, vect_location,
631 "induction value on backedge.\n");
632 return opt_result::success ();
636 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
637 return opt_result::success ();
641 /* Function vect_mark_stmts_to_be_vectorized.
643 Not all stmts in the loop need to be vectorized. For example:
645 for i...
646 for j...
647 1. T0 = i + j
648 2. T1 = a[T0]
650 3. j = j + 1
652 Stmt 1 and 3 do not need to be vectorized, because loop control and
653 addressing of vectorized data-refs are handled differently.
655 This pass detects such stmts. */
657 opt_result
658 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
660 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
661 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
662 unsigned int nbbs = loop->num_nodes;
663 gimple_stmt_iterator si;
664 unsigned int i;
665 basic_block bb;
666 bool live_p;
667 enum vect_relevant relevant;
669 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
671 auto_vec<stmt_vec_info, 64> worklist;
673 /* 1. Init worklist. */
674 for (i = 0; i < nbbs; i++)
676 bb = bbs[i];
677 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
679 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
680 if (dump_enabled_p ())
681 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
682 phi_info->stmt);
684 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
685 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
687 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
689 if (is_gimple_debug (gsi_stmt (si)))
690 continue;
691 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
692 if (dump_enabled_p ())
693 dump_printf_loc (MSG_NOTE, vect_location,
694 "init: stmt relevant? %G", stmt_info->stmt);
696 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
697 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
701 /* 2. Process_worklist */
702 while (worklist.length () > 0)
704 use_operand_p use_p;
705 ssa_op_iter iter;
707 stmt_vec_info stmt_vinfo = worklist.pop ();
708 if (dump_enabled_p ())
709 dump_printf_loc (MSG_NOTE, vect_location,
710 "worklist: examine stmt: %G", stmt_vinfo->stmt);
712 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
713 (DEF_STMT) as relevant/irrelevant according to the relevance property
714 of STMT. */
715 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
717 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
718 propagated as is to the DEF_STMTs of its USEs.
720 One exception is when STMT has been identified as defining a reduction
721 variable; in this case we set the relevance to vect_used_by_reduction.
722 This is because we distinguish between two kinds of relevant stmts -
723 those that are used by a reduction computation, and those that are
724 (also) used by a regular computation. This allows us later on to
725 identify stmts that are used solely by a reduction, and therefore the
726 order of the results that they produce does not have to be kept. */
728 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
730 case vect_reduction_def:
731 gcc_assert (relevant != vect_unused_in_scope);
732 if (relevant != vect_unused_in_scope
733 && relevant != vect_used_in_scope
734 && relevant != vect_used_by_reduction
735 && relevant != vect_used_only_live)
736 return opt_result::failure_at
737 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
738 break;
740 case vect_nested_cycle:
741 if (relevant != vect_unused_in_scope
742 && relevant != vect_used_in_outer_by_reduction
743 && relevant != vect_used_in_outer)
744 return opt_result::failure_at
745 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
746 break;
748 case vect_double_reduction_def:
749 if (relevant != vect_unused_in_scope
750 && relevant != vect_used_by_reduction
751 && relevant != vect_used_only_live)
752 return opt_result::failure_at
753 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
754 break;
756 default:
757 break;
760 if (is_pattern_stmt_p (stmt_vinfo))
762 /* Pattern statements are not inserted into the code, so
763 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
764 have to scan the RHS or function arguments instead. */
765 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
767 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
768 tree op = gimple_assign_rhs1 (assign);
770 i = 1;
771 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
773 opt_result res
774 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
775 loop_vinfo, relevant, &worklist, false);
776 if (!res)
777 return res;
778 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
779 loop_vinfo, relevant, &worklist, false);
780 if (!res)
781 return res;
782 i = 2;
784 for (; i < gimple_num_ops (assign); i++)
786 op = gimple_op (assign, i);
787 if (TREE_CODE (op) == SSA_NAME)
789 opt_result res
790 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
791 &worklist, false);
792 if (!res)
793 return res;
797 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
799 for (i = 0; i < gimple_call_num_args (call); i++)
801 tree arg = gimple_call_arg (call, i);
802 opt_result res
803 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
804 &worklist, false);
805 if (!res)
806 return res;
810 else
811 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
813 tree op = USE_FROM_PTR (use_p);
814 opt_result res
815 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
816 &worklist, false);
817 if (!res)
818 return res;
821 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
823 gather_scatter_info gs_info;
824 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
825 gcc_unreachable ();
826 opt_result res
827 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
828 &worklist, true);
829 if (!res)
831 if (fatal)
832 *fatal = false;
833 return res;
836 } /* while worklist */
838 return opt_result::success ();
841 /* Function vect_model_simple_cost.
843 Models cost for simple operations, i.e. those that only emit ncopies of a
844 single op. Right now, this does not account for multiple insns that could
845 be generated for the single vector op. We will handle that shortly. */
847 static void
848 vect_model_simple_cost (vec_info *,
849 stmt_vec_info stmt_info, int ncopies,
850 enum vect_def_type *dt,
851 int ndts,
852 slp_tree node,
853 stmt_vector_for_cost *cost_vec,
854 vect_cost_for_stmt kind = vector_stmt)
856 int inside_cost = 0, prologue_cost = 0;
858 gcc_assert (cost_vec != NULL);
860 /* ??? Somehow we need to fix this at the callers. */
861 if (node)
862 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
864 if (!node)
865 /* Cost the "broadcast" of a scalar operand in to a vector operand.
866 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
867 cost model. */
868 for (int i = 0; i < ndts; i++)
869 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
870 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
871 stmt_info, 0, vect_prologue);
873 /* Pass the inside-of-loop statements to the target-specific cost model. */
874 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
875 stmt_info, 0, vect_body);
877 if (dump_enabled_p ())
878 dump_printf_loc (MSG_NOTE, vect_location,
879 "vect_model_simple_cost: inside_cost = %d, "
880 "prologue_cost = %d .\n", inside_cost, prologue_cost);
884 /* Model cost for type demotion and promotion operations. PWR is
885 normally zero for single-step promotions and demotions. It will be
886 one if two-step promotion/demotion is required, and so on. NCOPIES
887 is the number of vector results (and thus number of instructions)
888 for the narrowest end of the operation chain. Each additional
889 step doubles the number of instructions required. If WIDEN_ARITH
890 is true the stmt is doing widening arithmetic. */
892 static void
893 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
894 enum vect_def_type *dt,
895 unsigned int ncopies, int pwr,
896 stmt_vector_for_cost *cost_vec,
897 bool widen_arith)
899 int i;
900 int inside_cost = 0, prologue_cost = 0;
902 for (i = 0; i < pwr + 1; i++)
904 inside_cost += record_stmt_cost (cost_vec, ncopies,
905 widen_arith
906 ? vector_stmt : vec_promote_demote,
907 stmt_info, 0, vect_body);
908 ncopies *= 2;
911 /* FORNOW: Assuming maximum 2 args per stmts. */
912 for (i = 0; i < 2; i++)
913 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
914 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
915 stmt_info, 0, vect_prologue);
917 if (dump_enabled_p ())
918 dump_printf_loc (MSG_NOTE, vect_location,
919 "vect_model_promotion_demotion_cost: inside_cost = %d, "
920 "prologue_cost = %d .\n", inside_cost, prologue_cost);
923 /* Returns true if the current function returns DECL. */
925 static bool
926 cfun_returns (tree decl)
928 edge_iterator ei;
929 edge e;
930 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
932 greturn *ret = safe_dyn_cast <greturn *> (*gsi_last_bb (e->src));
933 if (!ret)
934 continue;
935 if (gimple_return_retval (ret) == decl)
936 return true;
937 /* We often end up with an aggregate copy to the result decl,
938 handle that case as well. First skip intermediate clobbers
939 though. */
940 gimple *def = ret;
943 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
945 while (gimple_clobber_p (def));
946 if (is_a <gassign *> (def)
947 && gimple_assign_lhs (def) == gimple_return_retval (ret)
948 && gimple_assign_rhs1 (def) == decl)
949 return true;
951 return false;
954 /* Function vect_model_store_cost
956 Models cost for stores. In the case of grouped accesses, one access
957 has the overhead of the grouped access attributed to it. */
959 static void
960 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
961 vect_memory_access_type memory_access_type,
962 gather_scatter_info *gs_info,
963 dr_alignment_support alignment_support_scheme,
964 int misalignment,
965 vec_load_store_type vls_type, slp_tree slp_node,
966 stmt_vector_for_cost *cost_vec)
968 unsigned int inside_cost = 0, prologue_cost = 0;
969 stmt_vec_info first_stmt_info = stmt_info;
970 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
972 /* ??? Somehow we need to fix this at the callers. */
973 if (slp_node)
974 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
976 if (vls_type == VLS_STORE_INVARIANT)
978 if (!slp_node)
979 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
980 stmt_info, 0, vect_prologue);
983 /* Grouped stores update all elements in the group at once,
984 so we want the DR for the first statement. */
985 if (!slp_node && grouped_access_p)
986 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
988 /* True if we should include any once-per-group costs as well as
989 the cost of the statement itself. For SLP we only get called
990 once per group anyhow. */
991 bool first_stmt_p = (first_stmt_info == stmt_info);
993 /* We assume that the cost of a single store-lanes instruction is
994 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
995 access is instead being provided by a permute-and-store operation,
996 include the cost of the permutes. */
997 if (first_stmt_p
998 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1000 /* Uses a high and low interleave or shuffle operations for each
1001 needed permute. */
1002 int group_size = DR_GROUP_SIZE (first_stmt_info);
1003 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1004 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
1005 stmt_info, 0, vect_body);
1007 if (dump_enabled_p ())
1008 dump_printf_loc (MSG_NOTE, vect_location,
1009 "vect_model_store_cost: strided group_size = %d .\n",
1010 group_size);
1013 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1014 /* Costs of the stores. */
1015 if (memory_access_type == VMAT_ELEMENTWISE
1016 || memory_access_type == VMAT_GATHER_SCATTER)
1018 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1019 if (memory_access_type == VMAT_GATHER_SCATTER
1020 && gs_info->ifn == IFN_LAST && !gs_info->decl)
1021 /* For emulated scatter N offset vector element extracts
1022 (we assume the scalar scaling and ptr + offset add is consumed by
1023 the load). */
1024 inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
1025 vec_to_scalar, stmt_info, 0,
1026 vect_body);
1027 /* N scalar stores plus extracting the elements. */
1028 inside_cost += record_stmt_cost (cost_vec,
1029 ncopies * assumed_nunits,
1030 scalar_store, stmt_info, 0, vect_body);
1032 else
1033 vect_get_store_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
1034 misalignment, &inside_cost, cost_vec);
1036 if (memory_access_type == VMAT_ELEMENTWISE
1037 || memory_access_type == VMAT_STRIDED_SLP
1038 || (memory_access_type == VMAT_GATHER_SCATTER
1039 && gs_info->ifn == IFN_LAST && !gs_info->decl))
1041 /* N scalar stores plus extracting the elements. */
1042 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1043 inside_cost += record_stmt_cost (cost_vec,
1044 ncopies * assumed_nunits,
1045 vec_to_scalar, stmt_info, 0, vect_body);
1048 /* When vectorizing a store into the function result assign
1049 a penalty if the function returns in a multi-register location.
1050 In this case we assume we'll end up with having to spill the
1051 vector result and do piecewise loads as a conservative estimate. */
1052 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
1053 if (base
1054 && (TREE_CODE (base) == RESULT_DECL
1055 || (DECL_P (base) && cfun_returns (base)))
1056 && !aggregate_value_p (base, cfun->decl))
1058 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
1059 /* ??? Handle PARALLEL in some way. */
1060 if (REG_P (reg))
1062 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1063 /* Assume that a single reg-reg move is possible and cheap,
1064 do not account for vector to gp register move cost. */
1065 if (nregs > 1)
1067 /* Spill. */
1068 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1069 vector_store,
1070 stmt_info, 0, vect_epilogue);
1071 /* Loads. */
1072 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1073 scalar_load,
1074 stmt_info, 0, vect_epilogue);
1079 if (dump_enabled_p ())
1080 dump_printf_loc (MSG_NOTE, vect_location,
1081 "vect_model_store_cost: inside_cost = %d, "
1082 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1086 /* Calculate cost of DR's memory access. */
1087 void
1088 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1089 dr_alignment_support alignment_support_scheme,
1090 int misalignment,
1091 unsigned int *inside_cost,
1092 stmt_vector_for_cost *body_cost_vec)
1094 switch (alignment_support_scheme)
1096 case dr_aligned:
1098 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1099 vector_store, stmt_info, 0,
1100 vect_body);
1102 if (dump_enabled_p ())
1103 dump_printf_loc (MSG_NOTE, vect_location,
1104 "vect_model_store_cost: aligned.\n");
1105 break;
1108 case dr_unaligned_supported:
1110 /* Here, we assign an additional cost for the unaligned store. */
1111 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1112 unaligned_store, stmt_info,
1113 misalignment, vect_body);
1114 if (dump_enabled_p ())
1115 dump_printf_loc (MSG_NOTE, vect_location,
1116 "vect_model_store_cost: unaligned supported by "
1117 "hardware.\n");
1118 break;
1121 case dr_unaligned_unsupported:
1123 *inside_cost = VECT_MAX_COST;
1125 if (dump_enabled_p ())
1126 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1127 "vect_model_store_cost: unsupported access.\n");
1128 break;
1131 default:
1132 gcc_unreachable ();
1136 /* Calculate cost of DR's memory access. */
1137 void
1138 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1139 dr_alignment_support alignment_support_scheme,
1140 int misalignment,
1141 bool add_realign_cost, unsigned int *inside_cost,
1142 unsigned int *prologue_cost,
1143 stmt_vector_for_cost *prologue_cost_vec,
1144 stmt_vector_for_cost *body_cost_vec,
1145 bool record_prologue_costs)
1147 switch (alignment_support_scheme)
1149 case dr_aligned:
1151 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1152 stmt_info, 0, vect_body);
1154 if (dump_enabled_p ())
1155 dump_printf_loc (MSG_NOTE, vect_location,
1156 "vect_model_load_cost: aligned.\n");
1158 break;
1160 case dr_unaligned_supported:
1162 /* Here, we assign an additional cost for the unaligned load. */
1163 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1164 unaligned_load, stmt_info,
1165 misalignment, vect_body);
1167 if (dump_enabled_p ())
1168 dump_printf_loc (MSG_NOTE, vect_location,
1169 "vect_model_load_cost: unaligned supported by "
1170 "hardware.\n");
1172 break;
1174 case dr_explicit_realign:
1176 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1177 vector_load, stmt_info, 0, vect_body);
1178 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1179 vec_perm, stmt_info, 0, vect_body);
1181 /* FIXME: If the misalignment remains fixed across the iterations of
1182 the containing loop, the following cost should be added to the
1183 prologue costs. */
1184 if (targetm.vectorize.builtin_mask_for_load)
1185 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1186 stmt_info, 0, vect_body);
1188 if (dump_enabled_p ())
1189 dump_printf_loc (MSG_NOTE, vect_location,
1190 "vect_model_load_cost: explicit realign\n");
1192 break;
1194 case dr_explicit_realign_optimized:
1196 if (dump_enabled_p ())
1197 dump_printf_loc (MSG_NOTE, vect_location,
1198 "vect_model_load_cost: unaligned software "
1199 "pipelined.\n");
1201 /* Unaligned software pipeline has a load of an address, an initial
1202 load, and possibly a mask operation to "prime" the loop. However,
1203 if this is an access in a group of loads, which provide grouped
1204 access, then the above cost should only be considered for one
1205 access in the group. Inside the loop, there is a load op
1206 and a realignment op. */
1208 if (add_realign_cost && record_prologue_costs)
1210 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1211 vector_stmt, stmt_info,
1212 0, vect_prologue);
1213 if (targetm.vectorize.builtin_mask_for_load)
1214 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1215 vector_stmt, stmt_info,
1216 0, vect_prologue);
1219 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1220 stmt_info, 0, vect_body);
1221 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1222 stmt_info, 0, vect_body);
1224 if (dump_enabled_p ())
1225 dump_printf_loc (MSG_NOTE, vect_location,
1226 "vect_model_load_cost: explicit realign optimized"
1227 "\n");
1229 break;
1232 case dr_unaligned_unsupported:
1234 *inside_cost = VECT_MAX_COST;
1236 if (dump_enabled_p ())
1237 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1238 "vect_model_load_cost: unsupported access.\n");
1239 break;
1242 default:
1243 gcc_unreachable ();
1247 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1248 the loop preheader for the vectorized stmt STMT_VINFO. */
1250 static void
1251 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1252 gimple_stmt_iterator *gsi)
1254 if (gsi)
1255 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1256 else
1257 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1259 if (dump_enabled_p ())
1260 dump_printf_loc (MSG_NOTE, vect_location,
1261 "created new init_stmt: %G", new_stmt);
1264 /* Function vect_init_vector.
1266 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1267 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1268 vector type a vector with all elements equal to VAL is created first.
1269 Place the initialization at GSI if it is not NULL. Otherwise, place the
1270 initialization at the loop preheader.
1271 Return the DEF of INIT_STMT.
1272 It will be used in the vectorization of STMT_INFO. */
1274 tree
1275 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1276 gimple_stmt_iterator *gsi)
1278 gimple *init_stmt;
1279 tree new_temp;
1281 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1282 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1284 gcc_assert (VECTOR_TYPE_P (type));
1285 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1287 /* Scalar boolean value should be transformed into
1288 all zeros or all ones value before building a vector. */
1289 if (VECTOR_BOOLEAN_TYPE_P (type))
1291 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1292 tree false_val = build_zero_cst (TREE_TYPE (type));
1294 if (CONSTANT_CLASS_P (val))
1295 val = integer_zerop (val) ? false_val : true_val;
1296 else
1298 new_temp = make_ssa_name (TREE_TYPE (type));
1299 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1300 val, true_val, false_val);
1301 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1302 val = new_temp;
1305 else
1307 gimple_seq stmts = NULL;
1308 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1309 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1310 TREE_TYPE (type), val);
1311 else
1312 /* ??? Condition vectorization expects us to do
1313 promotion of invariant/external defs. */
1314 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1315 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1316 !gsi_end_p (gsi2); )
1318 init_stmt = gsi_stmt (gsi2);
1319 gsi_remove (&gsi2, false);
1320 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1324 val = build_vector_from_val (type, val);
1327 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1328 init_stmt = gimple_build_assign (new_temp, val);
1329 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1330 return new_temp;
1334 /* Function vect_get_vec_defs_for_operand.
1336 OP is an operand in STMT_VINFO. This function returns a vector of
1337 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1339 In the case that OP is an SSA_NAME which is defined in the loop, then
1340 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1342 In case OP is an invariant or constant, a new stmt that creates a vector def
1343 needs to be introduced. VECTYPE may be used to specify a required type for
1344 vector invariant. */
1346 void
1347 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1348 unsigned ncopies,
1349 tree op, vec<tree> *vec_oprnds, tree vectype)
1351 gimple *def_stmt;
1352 enum vect_def_type dt;
1353 bool is_simple_use;
1354 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1356 if (dump_enabled_p ())
1357 dump_printf_loc (MSG_NOTE, vect_location,
1358 "vect_get_vec_defs_for_operand: %T\n", op);
1360 stmt_vec_info def_stmt_info;
1361 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1362 &def_stmt_info, &def_stmt);
1363 gcc_assert (is_simple_use);
1364 if (def_stmt && dump_enabled_p ())
1365 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1367 vec_oprnds->create (ncopies);
1368 if (dt == vect_constant_def || dt == vect_external_def)
1370 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1371 tree vector_type;
1373 if (vectype)
1374 vector_type = vectype;
1375 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1376 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1377 vector_type = truth_type_for (stmt_vectype);
1378 else
1379 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1381 gcc_assert (vector_type);
1382 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1383 while (ncopies--)
1384 vec_oprnds->quick_push (vop);
1386 else
1388 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1389 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1390 for (unsigned i = 0; i < ncopies; ++i)
1391 vec_oprnds->quick_push (gimple_get_lhs
1392 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1397 /* Get vectorized definitions for OP0 and OP1. */
1399 void
1400 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1401 unsigned ncopies,
1402 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1403 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1404 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1405 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1407 if (slp_node)
1409 if (op0)
1410 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1411 if (op1)
1412 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1413 if (op2)
1414 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1415 if (op3)
1416 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1418 else
1420 if (op0)
1421 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1422 op0, vec_oprnds0, vectype0);
1423 if (op1)
1424 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1425 op1, vec_oprnds1, vectype1);
1426 if (op2)
1427 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1428 op2, vec_oprnds2, vectype2);
1429 if (op3)
1430 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1431 op3, vec_oprnds3, vectype3);
1435 void
1436 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1437 unsigned ncopies,
1438 tree op0, vec<tree> *vec_oprnds0,
1439 tree op1, vec<tree> *vec_oprnds1,
1440 tree op2, vec<tree> *vec_oprnds2,
1441 tree op3, vec<tree> *vec_oprnds3)
1443 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1444 op0, vec_oprnds0, NULL_TREE,
1445 op1, vec_oprnds1, NULL_TREE,
1446 op2, vec_oprnds2, NULL_TREE,
1447 op3, vec_oprnds3, NULL_TREE);
1450 /* Helper function called by vect_finish_replace_stmt and
1451 vect_finish_stmt_generation. Set the location of the new
1452 statement and create and return a stmt_vec_info for it. */
1454 static void
1455 vect_finish_stmt_generation_1 (vec_info *,
1456 stmt_vec_info stmt_info, gimple *vec_stmt)
1458 if (dump_enabled_p ())
1459 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1461 if (stmt_info)
1463 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1465 /* While EH edges will generally prevent vectorization, stmt might
1466 e.g. be in a must-not-throw region. Ensure newly created stmts
1467 that could throw are part of the same region. */
1468 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1469 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1470 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1472 else
1473 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1476 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1477 which sets the same scalar result as STMT_INFO did. Create and return a
1478 stmt_vec_info for VEC_STMT. */
1480 void
1481 vect_finish_replace_stmt (vec_info *vinfo,
1482 stmt_vec_info stmt_info, gimple *vec_stmt)
1484 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1485 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1487 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1488 gsi_replace (&gsi, vec_stmt, true);
1490 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1493 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1494 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1496 void
1497 vect_finish_stmt_generation (vec_info *vinfo,
1498 stmt_vec_info stmt_info, gimple *vec_stmt,
1499 gimple_stmt_iterator *gsi)
1501 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1503 if (!gsi_end_p (*gsi)
1504 && gimple_has_mem_ops (vec_stmt))
1506 gimple *at_stmt = gsi_stmt (*gsi);
1507 tree vuse = gimple_vuse (at_stmt);
1508 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1510 tree vdef = gimple_vdef (at_stmt);
1511 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1512 gimple_set_modified (vec_stmt, true);
1513 /* If we have an SSA vuse and insert a store, update virtual
1514 SSA form to avoid triggering the renamer. Do so only
1515 if we can easily see all uses - which is what almost always
1516 happens with the way vectorized stmts are inserted. */
1517 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1518 && ((is_gimple_assign (vec_stmt)
1519 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1520 || (is_gimple_call (vec_stmt)
1521 && (!(gimple_call_flags (vec_stmt)
1522 & (ECF_CONST|ECF_PURE|ECF_NOVOPS))
1523 || (gimple_call_lhs (vec_stmt)
1524 && !is_gimple_reg (gimple_call_lhs (vec_stmt)))))))
1526 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1527 gimple_set_vdef (vec_stmt, new_vdef);
1528 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1532 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1533 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1536 /* We want to vectorize a call to combined function CFN with function
1537 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1538 as the types of all inputs. Check whether this is possible using
1539 an internal function, returning its code if so or IFN_LAST if not. */
1541 static internal_fn
1542 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1543 tree vectype_out, tree vectype_in)
1545 internal_fn ifn;
1546 if (internal_fn_p (cfn))
1547 ifn = as_internal_fn (cfn);
1548 else
1549 ifn = associated_internal_fn (fndecl);
1550 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1552 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1553 if (info.vectorizable)
1555 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1556 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1557 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1558 OPTIMIZE_FOR_SPEED))
1559 return ifn;
1562 return IFN_LAST;
1566 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1567 gimple_stmt_iterator *);
1569 /* Check whether a load or store statement in the loop described by
1570 LOOP_VINFO is possible in a loop using partial vectors. This is
1571 testing whether the vectorizer pass has the appropriate support,
1572 as well as whether the target does.
1574 VLS_TYPE says whether the statement is a load or store and VECTYPE
1575 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1576 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1577 says how the load or store is going to be implemented and GROUP_SIZE
1578 is the number of load or store statements in the containing group.
1579 If the access is a gather load or scatter store, GS_INFO describes
1580 its arguments. If the load or store is conditional, SCALAR_MASK is the
1581 condition under which it occurs.
1583 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1584 vectors is not supported, otherwise record the required rgroup control
1585 types. */
1587 static void
1588 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1589 slp_tree slp_node,
1590 vec_load_store_type vls_type,
1591 int group_size,
1592 vect_memory_access_type
1593 memory_access_type,
1594 gather_scatter_info *gs_info,
1595 tree scalar_mask)
1597 /* Invariant loads need no special support. */
1598 if (memory_access_type == VMAT_INVARIANT)
1599 return;
1601 unsigned int nvectors;
1602 if (slp_node)
1603 nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1604 else
1605 nvectors = vect_get_num_copies (loop_vinfo, vectype);
1607 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1608 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1609 machine_mode vecmode = TYPE_MODE (vectype);
1610 bool is_load = (vls_type == VLS_LOAD);
1611 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1613 if (is_load
1614 ? !vect_load_lanes_supported (vectype, group_size, true)
1615 : !vect_store_lanes_supported (vectype, group_size, true))
1617 if (dump_enabled_p ())
1618 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1619 "can't operate on partial vectors because"
1620 " the target doesn't have an appropriate"
1621 " load/store-lanes instruction.\n");
1622 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1623 return;
1625 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1626 scalar_mask);
1627 return;
1630 if (memory_access_type == VMAT_GATHER_SCATTER)
1632 internal_fn ifn = (is_load
1633 ? IFN_MASK_GATHER_LOAD
1634 : IFN_MASK_SCATTER_STORE);
1635 internal_fn len_ifn = (is_load
1636 ? IFN_MASK_LEN_GATHER_LOAD
1637 : IFN_MASK_LEN_SCATTER_STORE);
1638 if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
1639 gs_info->memory_type,
1640 gs_info->offset_vectype,
1641 gs_info->scale))
1642 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1643 else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
1644 gs_info->memory_type,
1645 gs_info->offset_vectype,
1646 gs_info->scale))
1647 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1648 scalar_mask);
1649 else
1651 if (dump_enabled_p ())
1652 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1653 "can't operate on partial vectors because"
1654 " the target doesn't have an appropriate"
1655 " gather load or scatter store instruction.\n");
1656 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1658 return;
1661 if (memory_access_type != VMAT_CONTIGUOUS
1662 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1664 /* Element X of the data must come from iteration i * VF + X of the
1665 scalar loop. We need more work to support other mappings. */
1666 if (dump_enabled_p ())
1667 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1668 "can't operate on partial vectors because an"
1669 " access isn't contiguous.\n");
1670 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1671 return;
1674 if (!VECTOR_MODE_P (vecmode))
1676 if (dump_enabled_p ())
1677 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1678 "can't operate on partial vectors when emulating"
1679 " vector operations.\n");
1680 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1681 return;
1684 /* We might load more scalars than we need for permuting SLP loads.
1685 We checked in get_group_load_store_type that the extra elements
1686 don't leak into a new vector. */
1687 auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1689 unsigned int nvectors;
1690 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1691 return nvectors;
1692 gcc_unreachable ();
1695 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1696 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1697 machine_mode mask_mode;
1698 machine_mode vmode;
1699 bool using_partial_vectors_p = false;
1700 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1702 nvectors = group_memory_nvectors (group_size * vf, nunits);
1703 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1704 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1705 using_partial_vectors_p = true;
1707 else if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1708 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1710 nvectors = group_memory_nvectors (group_size * vf, nunits);
1711 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1712 using_partial_vectors_p = true;
1715 if (!using_partial_vectors_p)
1717 if (dump_enabled_p ())
1718 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1719 "can't operate on partial vectors because the"
1720 " target doesn't have the appropriate partial"
1721 " vectorization load or store.\n");
1722 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1726 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1727 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1728 that needs to be applied to all loads and stores in a vectorized loop.
1729 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1730 otherwise return VEC_MASK & LOOP_MASK.
1732 MASK_TYPE is the type of both masks. If new statements are needed,
1733 insert them before GSI. */
1735 static tree
1736 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1737 tree vec_mask, gimple_stmt_iterator *gsi)
1739 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1740 if (!loop_mask)
1741 return vec_mask;
1743 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1745 if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1746 return vec_mask;
1748 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1749 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1750 vec_mask, loop_mask);
1752 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1753 return and_res;
1756 /* Determine whether we can use a gather load or scatter store to vectorize
1757 strided load or store STMT_INFO by truncating the current offset to a
1758 smaller width. We need to be able to construct an offset vector:
1760 { 0, X, X*2, X*3, ... }
1762 without loss of precision, where X is STMT_INFO's DR_STEP.
1764 Return true if this is possible, describing the gather load or scatter
1765 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1767 static bool
1768 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1769 loop_vec_info loop_vinfo, bool masked_p,
1770 gather_scatter_info *gs_info)
1772 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1773 data_reference *dr = dr_info->dr;
1774 tree step = DR_STEP (dr);
1775 if (TREE_CODE (step) != INTEGER_CST)
1777 /* ??? Perhaps we could use range information here? */
1778 if (dump_enabled_p ())
1779 dump_printf_loc (MSG_NOTE, vect_location,
1780 "cannot truncate variable step.\n");
1781 return false;
1784 /* Get the number of bits in an element. */
1785 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1786 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1787 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1789 /* Set COUNT to the upper limit on the number of elements - 1.
1790 Start with the maximum vectorization factor. */
1791 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1793 /* Try lowering COUNT to the number of scalar latch iterations. */
1794 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1795 widest_int max_iters;
1796 if (max_loop_iterations (loop, &max_iters)
1797 && max_iters < count)
1798 count = max_iters.to_shwi ();
1800 /* Try scales of 1 and the element size. */
1801 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1802 wi::overflow_type overflow = wi::OVF_NONE;
1803 for (int i = 0; i < 2; ++i)
1805 int scale = scales[i];
1806 widest_int factor;
1807 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1808 continue;
1810 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1811 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1812 if (overflow)
1813 continue;
1814 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1815 unsigned int min_offset_bits = wi::min_precision (range, sign);
1817 /* Find the narrowest viable offset type. */
1818 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1819 tree offset_type = build_nonstandard_integer_type (offset_bits,
1820 sign == UNSIGNED);
1822 /* See whether the target supports the operation with an offset
1823 no narrower than OFFSET_TYPE. */
1824 tree memory_type = TREE_TYPE (DR_REF (dr));
1825 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1826 vectype, memory_type, offset_type, scale,
1827 &gs_info->ifn, &gs_info->offset_vectype)
1828 || gs_info->ifn == IFN_LAST)
1829 continue;
1831 gs_info->decl = NULL_TREE;
1832 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1833 but we don't need to store that here. */
1834 gs_info->base = NULL_TREE;
1835 gs_info->element_type = TREE_TYPE (vectype);
1836 gs_info->offset = fold_convert (offset_type, step);
1837 gs_info->offset_dt = vect_constant_def;
1838 gs_info->scale = scale;
1839 gs_info->memory_type = memory_type;
1840 return true;
1843 if (overflow && dump_enabled_p ())
1844 dump_printf_loc (MSG_NOTE, vect_location,
1845 "truncating gather/scatter offset to %d bits"
1846 " might change its value.\n", element_bits);
1848 return false;
1851 /* Return true if we can use gather/scatter internal functions to
1852 vectorize STMT_INFO, which is a grouped or strided load or store.
1853 MASKED_P is true if load or store is conditional. When returning
1854 true, fill in GS_INFO with the information required to perform the
1855 operation. */
1857 static bool
1858 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1859 loop_vec_info loop_vinfo, bool masked_p,
1860 gather_scatter_info *gs_info)
1862 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1863 || gs_info->ifn == IFN_LAST)
1864 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1865 masked_p, gs_info);
1867 tree old_offset_type = TREE_TYPE (gs_info->offset);
1868 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1870 gcc_assert (TYPE_PRECISION (new_offset_type)
1871 >= TYPE_PRECISION (old_offset_type));
1872 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1874 if (dump_enabled_p ())
1875 dump_printf_loc (MSG_NOTE, vect_location,
1876 "using gather/scatter for strided/grouped access,"
1877 " scale = %d\n", gs_info->scale);
1879 return true;
1882 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1883 elements with a known constant step. Return -1 if that step
1884 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1886 static int
1887 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1889 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1890 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1891 size_zero_node);
1894 /* If the target supports a permute mask that reverses the elements in
1895 a vector of type VECTYPE, return that mask, otherwise return null. */
1897 static tree
1898 perm_mask_for_reverse (tree vectype)
1900 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1902 /* The encoding has a single stepped pattern. */
1903 vec_perm_builder sel (nunits, 1, 3);
1904 for (int i = 0; i < 3; ++i)
1905 sel.quick_push (nunits - 1 - i);
1907 vec_perm_indices indices (sel, 1, nunits);
1908 if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
1909 indices))
1910 return NULL_TREE;
1911 return vect_gen_perm_mask_checked (vectype, indices);
1914 /* A subroutine of get_load_store_type, with a subset of the same
1915 arguments. Handle the case where STMT_INFO is a load or store that
1916 accesses consecutive elements with a negative step. Sets *POFFSET
1917 to the offset to be applied to the DR for the first access. */
1919 static vect_memory_access_type
1920 get_negative_load_store_type (vec_info *vinfo,
1921 stmt_vec_info stmt_info, tree vectype,
1922 vec_load_store_type vls_type,
1923 unsigned int ncopies, poly_int64 *poffset)
1925 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1926 dr_alignment_support alignment_support_scheme;
1928 if (ncopies > 1)
1930 if (dump_enabled_p ())
1931 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1932 "multiple types with negative step.\n");
1933 return VMAT_ELEMENTWISE;
1936 /* For backward running DRs the first access in vectype actually is
1937 N-1 elements before the address of the DR. */
1938 *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
1939 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1941 int misalignment = dr_misalignment (dr_info, vectype, *poffset);
1942 alignment_support_scheme
1943 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
1944 if (alignment_support_scheme != dr_aligned
1945 && alignment_support_scheme != dr_unaligned_supported)
1947 if (dump_enabled_p ())
1948 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1949 "negative step but alignment required.\n");
1950 *poffset = 0;
1951 return VMAT_ELEMENTWISE;
1954 if (vls_type == VLS_STORE_INVARIANT)
1956 if (dump_enabled_p ())
1957 dump_printf_loc (MSG_NOTE, vect_location,
1958 "negative step with invariant source;"
1959 " no permute needed.\n");
1960 return VMAT_CONTIGUOUS_DOWN;
1963 if (!perm_mask_for_reverse (vectype))
1965 if (dump_enabled_p ())
1966 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1967 "negative step and reversing not supported.\n");
1968 *poffset = 0;
1969 return VMAT_ELEMENTWISE;
1972 return VMAT_CONTIGUOUS_REVERSE;
1975 /* STMT_INFO is either a masked or unconditional store. Return the value
1976 being stored. */
1978 tree
1979 vect_get_store_rhs (stmt_vec_info stmt_info)
1981 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
1983 gcc_assert (gimple_assign_single_p (assign));
1984 return gimple_assign_rhs1 (assign);
1986 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
1988 internal_fn ifn = gimple_call_internal_fn (call);
1989 int index = internal_fn_stored_value_index (ifn);
1990 gcc_assert (index >= 0);
1991 return gimple_call_arg (call, index);
1993 gcc_unreachable ();
1996 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
1998 This function returns a vector type which can be composed with NETLS pieces,
1999 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2000 same vector size as the return vector. It checks target whether supports
2001 pieces-size vector mode for construction firstly, if target fails to, check
2002 pieces-size scalar mode for construction further. It returns NULL_TREE if
2003 fails to find the available composition.
2005 For example, for (vtype=V16QI, nelts=4), we can probably get:
2006 - V16QI with PTYPE V4QI.
2007 - V4SI with PTYPE SI.
2008 - NULL_TREE. */
2010 static tree
2011 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2013 gcc_assert (VECTOR_TYPE_P (vtype));
2014 gcc_assert (known_gt (nelts, 0U));
2016 machine_mode vmode = TYPE_MODE (vtype);
2017 if (!VECTOR_MODE_P (vmode))
2018 return NULL_TREE;
2020 /* When we are asked to compose the vector from its components let
2021 that happen directly. */
2022 if (known_eq (TYPE_VECTOR_SUBPARTS (vtype), nelts))
2024 *ptype = TREE_TYPE (vtype);
2025 return vtype;
2028 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2029 unsigned int pbsize;
2030 if (constant_multiple_p (vbsize, nelts, &pbsize))
2032 /* First check if vec_init optab supports construction from
2033 vector pieces directly. */
2034 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2035 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2036 machine_mode rmode;
2037 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2038 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2039 != CODE_FOR_nothing))
2041 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2042 return vtype;
2045 /* Otherwise check if exists an integer type of the same piece size and
2046 if vec_init optab supports construction from it directly. */
2047 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2048 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2049 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2050 != CODE_FOR_nothing))
2052 *ptype = build_nonstandard_integer_type (pbsize, 1);
2053 return build_vector_type (*ptype, nelts);
2057 return NULL_TREE;
2060 /* A subroutine of get_load_store_type, with a subset of the same
2061 arguments. Handle the case where STMT_INFO is part of a grouped load
2062 or store.
2064 For stores, the statements in the group are all consecutive
2065 and there is no gap at the end. For loads, the statements in the
2066 group might not be consecutive; there can be gaps between statements
2067 as well as at the end. */
2069 static bool
2070 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2071 tree vectype, slp_tree slp_node,
2072 bool masked_p, vec_load_store_type vls_type,
2073 vect_memory_access_type *memory_access_type,
2074 poly_int64 *poffset,
2075 dr_alignment_support *alignment_support_scheme,
2076 int *misalignment,
2077 gather_scatter_info *gs_info)
2079 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2080 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2081 stmt_vec_info first_stmt_info;
2082 unsigned int group_size;
2083 unsigned HOST_WIDE_INT gap;
2084 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2086 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2087 group_size = DR_GROUP_SIZE (first_stmt_info);
2088 gap = DR_GROUP_GAP (first_stmt_info);
2090 else
2092 first_stmt_info = stmt_info;
2093 group_size = 1;
2094 gap = 0;
2096 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2097 bool single_element_p = (stmt_info == first_stmt_info
2098 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2099 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2101 /* True if the vectorized statements would access beyond the last
2102 statement in the group. */
2103 bool overrun_p = false;
2105 /* True if we can cope with such overrun by peeling for gaps, so that
2106 there is at least one final scalar iteration after the vector loop. */
2107 bool can_overrun_p = (!masked_p
2108 && vls_type == VLS_LOAD
2109 && loop_vinfo
2110 && !loop->inner);
2112 /* There can only be a gap at the end of the group if the stride is
2113 known at compile time. */
2114 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2116 /* Stores can't yet have gaps. */
2117 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2119 if (slp_node)
2121 /* For SLP vectorization we directly vectorize a subchain
2122 without permutation. */
2123 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2124 first_dr_info
2125 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2126 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2128 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2129 separated by the stride, until we have a complete vector.
2130 Fall back to scalar accesses if that isn't possible. */
2131 if (multiple_p (nunits, group_size))
2132 *memory_access_type = VMAT_STRIDED_SLP;
2133 else
2134 *memory_access_type = VMAT_ELEMENTWISE;
2136 else
2138 overrun_p = loop_vinfo && gap != 0;
2139 if (overrun_p && vls_type != VLS_LOAD)
2141 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2142 "Grouped store with gaps requires"
2143 " non-consecutive accesses\n");
2144 return false;
2146 /* An overrun is fine if the trailing elements are smaller
2147 than the alignment boundary B. Every vector access will
2148 be a multiple of B and so we are guaranteed to access a
2149 non-gap element in the same B-sized block. */
2150 if (overrun_p
2151 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2152 vectype)
2153 / vect_get_scalar_dr_size (first_dr_info)))
2154 overrun_p = false;
2156 /* If the gap splits the vector in half and the target
2157 can do half-vector operations avoid the epilogue peeling
2158 by simply loading half of the vector only. Usually
2159 the construction with an upper zero half will be elided. */
2160 dr_alignment_support alss;
2161 int misalign = dr_misalignment (first_dr_info, vectype);
2162 tree half_vtype;
2163 if (overrun_p
2164 && !masked_p
2165 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2166 vectype, misalign)))
2167 == dr_aligned
2168 || alss == dr_unaligned_supported)
2169 && known_eq (nunits, (group_size - gap) * 2)
2170 && known_eq (nunits, group_size)
2171 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2172 != NULL_TREE))
2173 overrun_p = false;
2175 if (overrun_p && !can_overrun_p)
2177 if (dump_enabled_p ())
2178 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2179 "Peeling for outer loop is not supported\n");
2180 return false;
2182 int cmp = compare_step_with_zero (vinfo, stmt_info);
2183 if (cmp < 0)
2185 if (single_element_p)
2186 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2187 only correct for single element "interleaving" SLP. */
2188 *memory_access_type = get_negative_load_store_type
2189 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2190 else
2192 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2193 separated by the stride, until we have a complete vector.
2194 Fall back to scalar accesses if that isn't possible. */
2195 if (multiple_p (nunits, group_size))
2196 *memory_access_type = VMAT_STRIDED_SLP;
2197 else
2198 *memory_access_type = VMAT_ELEMENTWISE;
2201 else if (cmp == 0 && loop_vinfo)
2203 gcc_assert (vls_type == VLS_LOAD);
2204 *memory_access_type = VMAT_INVARIANT;
2205 /* Invariant accesses perform only component accesses, alignment
2206 is irrelevant for them. */
2207 *alignment_support_scheme = dr_unaligned_supported;
2209 else
2210 *memory_access_type = VMAT_CONTIGUOUS;
2212 /* When we have a contiguous access across loop iterations
2213 but the access in the loop doesn't cover the full vector
2214 we can end up with no gap recorded but still excess
2215 elements accessed, see PR103116. Make sure we peel for
2216 gaps if necessary and sufficient and give up if not.
2217 If there is a combination of the access not covering the full vector and
2218 a gap recorded then we may need to peel twice. */
2219 if (loop_vinfo
2220 && *memory_access_type == VMAT_CONTIGUOUS
2221 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2222 && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2223 nunits))
2225 unsigned HOST_WIDE_INT cnunits, cvf;
2226 if (!can_overrun_p
2227 || !nunits.is_constant (&cnunits)
2228 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
2229 /* Peeling for gaps assumes that a single scalar iteration
2230 is enough to make sure the last vector iteration doesn't
2231 access excess elements.
2232 ??? Enhancements include peeling multiple iterations
2233 or using masked loads with a static mask. */
2234 || (group_size * cvf) % cnunits + group_size - gap < cnunits)
2236 if (dump_enabled_p ())
2237 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2238 "peeling for gaps insufficient for "
2239 "access\n");
2240 return false;
2242 overrun_p = true;
2246 else
2248 /* We can always handle this case using elementwise accesses,
2249 but see if something more efficient is available. */
2250 *memory_access_type = VMAT_ELEMENTWISE;
2252 /* If there is a gap at the end of the group then these optimizations
2253 would access excess elements in the last iteration. */
2254 bool would_overrun_p = (gap != 0);
2255 /* An overrun is fine if the trailing elements are smaller than the
2256 alignment boundary B. Every vector access will be a multiple of B
2257 and so we are guaranteed to access a non-gap element in the
2258 same B-sized block. */
2259 if (would_overrun_p
2260 && !masked_p
2261 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2262 / vect_get_scalar_dr_size (first_dr_info)))
2263 would_overrun_p = false;
2265 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2266 && (can_overrun_p || !would_overrun_p)
2267 && compare_step_with_zero (vinfo, stmt_info) > 0)
2269 /* First cope with the degenerate case of a single-element
2270 vector. */
2271 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2274 /* Otherwise try using LOAD/STORE_LANES. */
2275 else if (vls_type == VLS_LOAD
2276 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2277 : vect_store_lanes_supported (vectype, group_size,
2278 masked_p))
2280 *memory_access_type = VMAT_LOAD_STORE_LANES;
2281 overrun_p = would_overrun_p;
2284 /* If that fails, try using permuting loads. */
2285 else if (vls_type == VLS_LOAD
2286 ? vect_grouped_load_supported (vectype, single_element_p,
2287 group_size)
2288 : vect_grouped_store_supported (vectype, group_size))
2290 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2291 overrun_p = would_overrun_p;
2295 /* As a last resort, trying using a gather load or scatter store.
2297 ??? Although the code can handle all group sizes correctly,
2298 it probably isn't a win to use separate strided accesses based
2299 on nearby locations. Or, even if it's a win over scalar code,
2300 it might not be a win over vectorizing at a lower VF, if that
2301 allows us to use contiguous accesses. */
2302 if (*memory_access_type == VMAT_ELEMENTWISE
2303 && single_element_p
2304 && loop_vinfo
2305 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2306 masked_p, gs_info))
2307 *memory_access_type = VMAT_GATHER_SCATTER;
2310 if (*memory_access_type == VMAT_GATHER_SCATTER
2311 || *memory_access_type == VMAT_ELEMENTWISE)
2313 *alignment_support_scheme = dr_unaligned_supported;
2314 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2316 else
2318 *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2319 *alignment_support_scheme
2320 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2321 *misalignment);
2324 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2326 /* STMT is the leader of the group. Check the operands of all the
2327 stmts of the group. */
2328 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2329 while (next_stmt_info)
2331 tree op = vect_get_store_rhs (next_stmt_info);
2332 enum vect_def_type dt;
2333 if (!vect_is_simple_use (op, vinfo, &dt))
2335 if (dump_enabled_p ())
2336 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2337 "use not simple.\n");
2338 return false;
2340 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2344 if (overrun_p)
2346 gcc_assert (can_overrun_p);
2347 if (dump_enabled_p ())
2348 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2349 "Data access with gaps requires scalar "
2350 "epilogue loop\n");
2351 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2354 return true;
2357 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2358 if there is a memory access type that the vectorized form can use,
2359 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2360 or scatters, fill in GS_INFO accordingly. In addition
2361 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2362 the target does not support the alignment scheme. *MISALIGNMENT
2363 is set according to the alignment of the access (including
2364 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2366 SLP says whether we're performing SLP rather than loop vectorization.
2367 MASKED_P is true if the statement is conditional on a vectorized mask.
2368 VECTYPE is the vector type that the vectorized statements will use.
2369 NCOPIES is the number of vector statements that will be needed. */
2371 static bool
2372 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2373 tree vectype, slp_tree slp_node,
2374 bool masked_p, vec_load_store_type vls_type,
2375 unsigned int ncopies,
2376 vect_memory_access_type *memory_access_type,
2377 poly_int64 *poffset,
2378 dr_alignment_support *alignment_support_scheme,
2379 int *misalignment,
2380 gather_scatter_info *gs_info)
2382 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2383 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2384 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2385 *poffset = 0;
2386 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2388 *memory_access_type = VMAT_GATHER_SCATTER;
2389 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2390 gcc_unreachable ();
2391 /* When using internal functions, we rely on pattern recognition
2392 to convert the type of the offset to the type that the target
2393 requires, with the result being a call to an internal function.
2394 If that failed for some reason (e.g. because another pattern
2395 took priority), just handle cases in which the offset already
2396 has the right type. */
2397 else if (gs_info->ifn != IFN_LAST
2398 && !is_gimple_call (stmt_info->stmt)
2399 && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
2400 TREE_TYPE (gs_info->offset_vectype)))
2402 if (dump_enabled_p ())
2403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2404 "%s offset requires a conversion\n",
2405 vls_type == VLS_LOAD ? "gather" : "scatter");
2406 return false;
2408 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2409 &gs_info->offset_dt,
2410 &gs_info->offset_vectype))
2412 if (dump_enabled_p ())
2413 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2414 "%s index use not simple.\n",
2415 vls_type == VLS_LOAD ? "gather" : "scatter");
2416 return false;
2418 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2420 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2421 || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant ()
2422 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2423 (gs_info->offset_vectype),
2424 TYPE_VECTOR_SUBPARTS (vectype)))
2426 if (dump_enabled_p ())
2427 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2428 "unsupported vector types for emulated "
2429 "gather.\n");
2430 return false;
2433 /* Gather-scatter accesses perform only component accesses, alignment
2434 is irrelevant for them. */
2435 *alignment_support_scheme = dr_unaligned_supported;
2437 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info) || slp_node)
2439 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2440 masked_p,
2441 vls_type, memory_access_type, poffset,
2442 alignment_support_scheme,
2443 misalignment, gs_info))
2444 return false;
2446 else if (STMT_VINFO_STRIDED_P (stmt_info))
2448 gcc_assert (!slp_node);
2449 if (loop_vinfo
2450 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2451 masked_p, gs_info))
2452 *memory_access_type = VMAT_GATHER_SCATTER;
2453 else
2454 *memory_access_type = VMAT_ELEMENTWISE;
2455 /* Alignment is irrelevant here. */
2456 *alignment_support_scheme = dr_unaligned_supported;
2458 else
2460 int cmp = compare_step_with_zero (vinfo, stmt_info);
2461 if (cmp == 0)
2463 gcc_assert (vls_type == VLS_LOAD);
2464 *memory_access_type = VMAT_INVARIANT;
2465 /* Invariant accesses perform only component accesses, alignment
2466 is irrelevant for them. */
2467 *alignment_support_scheme = dr_unaligned_supported;
2469 else
2471 if (cmp < 0)
2472 *memory_access_type = get_negative_load_store_type
2473 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2474 else
2475 *memory_access_type = VMAT_CONTIGUOUS;
2476 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2477 vectype, *poffset);
2478 *alignment_support_scheme
2479 = vect_supportable_dr_alignment (vinfo,
2480 STMT_VINFO_DR_INFO (stmt_info),
2481 vectype, *misalignment);
2485 if ((*memory_access_type == VMAT_ELEMENTWISE
2486 || *memory_access_type == VMAT_STRIDED_SLP)
2487 && !nunits.is_constant ())
2489 if (dump_enabled_p ())
2490 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2491 "Not using elementwise accesses due to variable "
2492 "vectorization factor.\n");
2493 return false;
2496 if (*alignment_support_scheme == dr_unaligned_unsupported)
2498 if (dump_enabled_p ())
2499 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2500 "unsupported unaligned access\n");
2501 return false;
2504 /* FIXME: At the moment the cost model seems to underestimate the
2505 cost of using elementwise accesses. This check preserves the
2506 traditional behavior until that can be fixed. */
2507 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2508 if (!first_stmt_info)
2509 first_stmt_info = stmt_info;
2510 if (*memory_access_type == VMAT_ELEMENTWISE
2511 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2512 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2513 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2514 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2516 if (dump_enabled_p ())
2517 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2518 "not falling back to elementwise accesses\n");
2519 return false;
2521 return true;
2524 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2525 conditional operation STMT_INFO. When returning true, store the mask
2526 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2527 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2528 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2530 static bool
2531 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2532 slp_tree slp_node, unsigned mask_index,
2533 tree *mask, slp_tree *mask_node,
2534 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2536 enum vect_def_type mask_dt;
2537 tree mask_vectype;
2538 slp_tree mask_node_1;
2539 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2540 mask, &mask_node_1, &mask_dt, &mask_vectype))
2542 if (dump_enabled_p ())
2543 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2544 "mask use not simple.\n");
2545 return false;
2548 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2550 if (dump_enabled_p ())
2551 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2552 "mask argument is not a boolean.\n");
2553 return false;
2556 /* If the caller is not prepared for adjusting an external/constant
2557 SLP mask vector type fail. */
2558 if (slp_node
2559 && !mask_node
2560 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2562 if (dump_enabled_p ())
2563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2564 "SLP mask argument is not vectorized.\n");
2565 return false;
2568 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2569 if (!mask_vectype)
2570 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2572 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2574 if (dump_enabled_p ())
2575 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2576 "could not find an appropriate vector mask type.\n");
2577 return false;
2580 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2581 TYPE_VECTOR_SUBPARTS (vectype)))
2583 if (dump_enabled_p ())
2584 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2585 "vector mask type %T"
2586 " does not match vector data type %T.\n",
2587 mask_vectype, vectype);
2589 return false;
2592 *mask_dt_out = mask_dt;
2593 *mask_vectype_out = mask_vectype;
2594 if (mask_node)
2595 *mask_node = mask_node_1;
2596 return true;
2599 /* Return true if stored value RHS is suitable for vectorizing store
2600 statement STMT_INFO. When returning true, store the type of the
2601 definition in *RHS_DT_OUT, the type of the vectorized store value in
2602 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2604 static bool
2605 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2606 slp_tree slp_node, tree rhs,
2607 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2608 vec_load_store_type *vls_type_out)
2610 /* In the case this is a store from a constant make sure
2611 native_encode_expr can handle it. */
2612 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2614 if (dump_enabled_p ())
2615 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2616 "cannot encode constant as a byte sequence.\n");
2617 return false;
2620 unsigned op_no = 0;
2621 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2623 if (gimple_call_internal_p (call)
2624 && internal_store_fn_p (gimple_call_internal_fn (call)))
2625 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2628 enum vect_def_type rhs_dt;
2629 tree rhs_vectype;
2630 slp_tree slp_op;
2631 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2632 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2634 if (dump_enabled_p ())
2635 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2636 "use not simple.\n");
2637 return false;
2640 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2641 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2643 if (dump_enabled_p ())
2644 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2645 "incompatible vector types.\n");
2646 return false;
2649 *rhs_dt_out = rhs_dt;
2650 *rhs_vectype_out = rhs_vectype;
2651 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2652 *vls_type_out = VLS_STORE_INVARIANT;
2653 else
2654 *vls_type_out = VLS_STORE;
2655 return true;
2658 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2659 Note that we support masks with floating-point type, in which case the
2660 floats are interpreted as a bitmask. */
2662 static tree
2663 vect_build_all_ones_mask (vec_info *vinfo,
2664 stmt_vec_info stmt_info, tree masktype)
2666 if (TREE_CODE (masktype) == INTEGER_TYPE)
2667 return build_int_cst (masktype, -1);
2668 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2670 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2671 mask = build_vector_from_val (masktype, mask);
2672 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2674 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2676 REAL_VALUE_TYPE r;
2677 long tmp[6];
2678 for (int j = 0; j < 6; ++j)
2679 tmp[j] = -1;
2680 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2681 tree mask = build_real (TREE_TYPE (masktype), r);
2682 mask = build_vector_from_val (masktype, mask);
2683 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2685 gcc_unreachable ();
2688 /* Build an all-zero merge value of type VECTYPE while vectorizing
2689 STMT_INFO as a gather load. */
2691 static tree
2692 vect_build_zero_merge_argument (vec_info *vinfo,
2693 stmt_vec_info stmt_info, tree vectype)
2695 tree merge;
2696 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2697 merge = build_int_cst (TREE_TYPE (vectype), 0);
2698 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2700 REAL_VALUE_TYPE r;
2701 long tmp[6];
2702 for (int j = 0; j < 6; ++j)
2703 tmp[j] = 0;
2704 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2705 merge = build_real (TREE_TYPE (vectype), r);
2707 else
2708 gcc_unreachable ();
2709 merge = build_vector_from_val (vectype, merge);
2710 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2713 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2714 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2715 the gather load operation. If the load is conditional, MASK is the
2716 unvectorized condition and MASK_DT is its definition type, otherwise
2717 MASK is null. */
2719 static void
2720 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2721 gimple_stmt_iterator *gsi,
2722 gimple **vec_stmt,
2723 gather_scatter_info *gs_info,
2724 tree mask,
2725 stmt_vector_for_cost *cost_vec)
2727 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2728 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2729 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2730 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2731 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2732 edge pe = loop_preheader_edge (loop);
2733 enum { NARROW, NONE, WIDEN } modifier;
2734 poly_uint64 gather_off_nunits
2735 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2737 /* FIXME: Keep the previous costing way in vect_model_load_cost by costing
2738 N scalar loads, but it should be tweaked to use target specific costs
2739 on related gather load calls. */
2740 if (cost_vec)
2742 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
2743 unsigned int inside_cost;
2744 inside_cost = record_stmt_cost (cost_vec, ncopies * assumed_nunits,
2745 scalar_load, stmt_info, 0, vect_body);
2746 if (dump_enabled_p ())
2747 dump_printf_loc (MSG_NOTE, vect_location,
2748 "vect_model_load_cost: inside_cost = %d, "
2749 "prologue_cost = 0 .\n",
2750 inside_cost);
2751 return;
2754 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2755 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2756 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2757 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2758 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2759 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2760 tree scaletype = TREE_VALUE (arglist);
2761 tree real_masktype = masktype;
2762 gcc_checking_assert (types_compatible_p (srctype, rettype)
2763 && (!mask
2764 || TREE_CODE (masktype) == INTEGER_TYPE
2765 || types_compatible_p (srctype, masktype)));
2766 if (mask)
2767 masktype = truth_type_for (srctype);
2769 tree mask_halftype = masktype;
2770 tree perm_mask = NULL_TREE;
2771 tree mask_perm_mask = NULL_TREE;
2772 if (known_eq (nunits, gather_off_nunits))
2773 modifier = NONE;
2774 else if (known_eq (nunits * 2, gather_off_nunits))
2776 modifier = WIDEN;
2778 /* Currently widening gathers and scatters are only supported for
2779 fixed-length vectors. */
2780 int count = gather_off_nunits.to_constant ();
2781 vec_perm_builder sel (count, count, 1);
2782 for (int i = 0; i < count; ++i)
2783 sel.quick_push (i | (count / 2));
2785 vec_perm_indices indices (sel, 1, count);
2786 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2787 indices);
2789 else if (known_eq (nunits, gather_off_nunits * 2))
2791 modifier = NARROW;
2793 /* Currently narrowing gathers and scatters are only supported for
2794 fixed-length vectors. */
2795 int count = nunits.to_constant ();
2796 vec_perm_builder sel (count, count, 1);
2797 sel.quick_grow (count);
2798 for (int i = 0; i < count; ++i)
2799 sel[i] = i < count / 2 ? i : i + count / 2;
2800 vec_perm_indices indices (sel, 2, count);
2801 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2803 ncopies *= 2;
2805 if (mask && VECTOR_TYPE_P (real_masktype))
2807 for (int i = 0; i < count; ++i)
2808 sel[i] = i | (count / 2);
2809 indices.new_vector (sel, 2, count);
2810 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2812 else if (mask)
2813 mask_halftype = truth_type_for (gs_info->offset_vectype);
2815 else
2816 gcc_unreachable ();
2818 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2819 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2821 tree ptr = fold_convert (ptrtype, gs_info->base);
2822 if (!is_gimple_min_invariant (ptr))
2824 gimple_seq seq;
2825 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2826 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2827 gcc_assert (!new_bb);
2830 tree scale = build_int_cst (scaletype, gs_info->scale);
2832 tree vec_oprnd0 = NULL_TREE;
2833 tree vec_mask = NULL_TREE;
2834 tree src_op = NULL_TREE;
2835 tree mask_op = NULL_TREE;
2836 tree prev_res = NULL_TREE;
2838 if (!mask)
2840 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2841 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2844 auto_vec<tree> vec_oprnds0;
2845 auto_vec<tree> vec_masks;
2846 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2847 modifier == WIDEN ? ncopies / 2 : ncopies,
2848 gs_info->offset, &vec_oprnds0);
2849 if (mask)
2850 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2851 modifier == NARROW ? ncopies / 2 : ncopies,
2852 mask, &vec_masks, masktype);
2853 for (int j = 0; j < ncopies; ++j)
2855 tree op, var;
2856 if (modifier == WIDEN && (j & 1))
2857 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2858 perm_mask, stmt_info, gsi);
2859 else
2860 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2862 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2864 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2865 TYPE_VECTOR_SUBPARTS (idxtype)));
2866 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2867 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2868 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2869 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2870 op = var;
2873 if (mask)
2875 if (mask_perm_mask && (j & 1))
2876 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2877 mask_perm_mask, stmt_info, gsi);
2878 else
2880 if (modifier == NARROW)
2882 if ((j & 1) == 0)
2883 vec_mask = vec_masks[j / 2];
2885 else
2886 vec_mask = vec_masks[j];
2888 mask_op = vec_mask;
2889 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2891 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2892 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2893 gcc_assert (known_eq (sub1, sub2));
2894 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2895 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2896 gassign *new_stmt
2897 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2898 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2899 mask_op = var;
2902 if (modifier == NARROW && !VECTOR_TYPE_P (real_masktype))
2904 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2905 gassign *new_stmt
2906 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2907 : VEC_UNPACK_LO_EXPR,
2908 mask_op);
2909 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2910 mask_op = var;
2912 src_op = mask_op;
2915 tree mask_arg = mask_op;
2916 if (masktype != real_masktype)
2918 tree utype, optype = TREE_TYPE (mask_op);
2919 if (VECTOR_TYPE_P (real_masktype)
2920 || TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2921 utype = real_masktype;
2922 else
2923 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2924 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2925 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2926 gassign *new_stmt
2927 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2928 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2929 mask_arg = var;
2930 if (!useless_type_conversion_p (real_masktype, utype))
2932 gcc_assert (TYPE_PRECISION (utype)
2933 <= TYPE_PRECISION (real_masktype));
2934 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2935 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2936 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2937 mask_arg = var;
2939 src_op = build_zero_cst (srctype);
2941 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2942 mask_arg, scale);
2944 if (!useless_type_conversion_p (vectype, rettype))
2946 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2947 TYPE_VECTOR_SUBPARTS (rettype)));
2948 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2949 gimple_call_set_lhs (new_stmt, op);
2950 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2951 var = make_ssa_name (vec_dest);
2952 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2953 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2954 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2956 else
2958 var = make_ssa_name (vec_dest, new_stmt);
2959 gimple_call_set_lhs (new_stmt, var);
2960 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2963 if (modifier == NARROW)
2965 if ((j & 1) == 0)
2967 prev_res = var;
2968 continue;
2970 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2971 stmt_info, gsi);
2972 new_stmt = SSA_NAME_DEF_STMT (var);
2975 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2977 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2980 /* Prepare the base and offset in GS_INFO for vectorization.
2981 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2982 to the vectorized offset argument for the first copy of STMT_INFO.
2983 STMT_INFO is the statement described by GS_INFO and LOOP is the
2984 containing loop. */
2986 static void
2987 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
2988 class loop *loop, stmt_vec_info stmt_info,
2989 slp_tree slp_node, gather_scatter_info *gs_info,
2990 tree *dataref_ptr, vec<tree> *vec_offset)
2992 gimple_seq stmts = NULL;
2993 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2994 if (stmts != NULL)
2996 basic_block new_bb;
2997 edge pe = loop_preheader_edge (loop);
2998 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2999 gcc_assert (!new_bb);
3001 if (slp_node)
3002 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
3003 else
3005 unsigned ncopies
3006 = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
3007 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
3008 gs_info->offset, vec_offset,
3009 gs_info->offset_vectype);
3013 /* Prepare to implement a grouped or strided load or store using
3014 the gather load or scatter store operation described by GS_INFO.
3015 STMT_INFO is the load or store statement.
3017 Set *DATAREF_BUMP to the amount that should be added to the base
3018 address after each copy of the vectorized statement. Set *VEC_OFFSET
3019 to an invariant offset vector in which element I has the value
3020 I * DR_STEP / SCALE. */
3022 static void
3023 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
3024 loop_vec_info loop_vinfo,
3025 gimple_stmt_iterator *gsi,
3026 gather_scatter_info *gs_info,
3027 tree *dataref_bump, tree *vec_offset,
3028 vec_loop_lens *loop_lens)
3030 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3031 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3033 if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
3035 /* _31 = .SELECT_VL (ivtmp_29, POLY_INT_CST [4, 4]);
3036 ivtmp_8 = _31 * 16 (step in bytes);
3037 .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
3038 vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
3039 tree loop_len
3040 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0);
3041 tree tmp
3042 = fold_build2 (MULT_EXPR, sizetype,
3043 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3044 loop_len);
3045 *dataref_bump = force_gimple_operand_gsi (gsi, tmp, true, NULL_TREE, true,
3046 GSI_SAME_STMT);
3048 else
3050 tree bump
3051 = size_binop (MULT_EXPR,
3052 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3053 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3054 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
3057 /* The offset given in GS_INFO can have pointer type, so use the element
3058 type of the vector instead. */
3059 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
3061 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3062 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3063 ssize_int (gs_info->scale));
3064 step = fold_convert (offset_type, step);
3066 /* Create {0, X, X*2, X*3, ...}. */
3067 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
3068 build_zero_cst (offset_type), step);
3069 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
3072 /* Prepare the pointer IVs which needs to be updated by a variable amount.
3073 Such variable amount is the outcome of .SELECT_VL. In this case, we can
3074 allow each iteration process the flexible number of elements as long as
3075 the number <= vf elments.
3077 Return data reference according to SELECT_VL.
3078 If new statements are needed, insert them before GSI. */
3080 static tree
3081 vect_get_loop_variant_data_ptr_increment (
3082 vec_info *vinfo, tree aggr_type, gimple_stmt_iterator *gsi,
3083 vec_loop_lens *loop_lens, dr_vec_info *dr_info,
3084 vect_memory_access_type memory_access_type)
3086 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
3087 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3089 /* TODO: We don't support gather/scatter or load_lanes/store_lanes for pointer
3090 IVs are updated by variable amount but we will support them in the future.
3092 gcc_assert (memory_access_type != VMAT_GATHER_SCATTER
3093 && memory_access_type != VMAT_LOAD_STORE_LANES);
3095 /* When we support SELECT_VL pattern, we dynamic adjust
3096 the memory address by .SELECT_VL result.
3098 The result of .SELECT_VL is the number of elements to
3099 be processed of each iteration. So the memory address
3100 adjustment operation should be:
3102 addr = addr + .SELECT_VL (ARG..) * step;
3104 tree loop_len
3105 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0);
3106 tree len_type = TREE_TYPE (loop_len);
3107 /* Since the outcome of .SELECT_VL is element size, we should adjust
3108 it into bytesize so that it can be used in address pointer variable
3109 amount IVs adjustment. */
3110 tree tmp = fold_build2 (MULT_EXPR, len_type, loop_len,
3111 wide_int_to_tree (len_type, wi::to_widest (step)));
3112 tree bump = make_temp_ssa_name (len_type, NULL, "ivtmp");
3113 gassign *assign = gimple_build_assign (bump, tmp);
3114 gsi_insert_before (gsi, assign, GSI_SAME_STMT);
3115 return bump;
3118 /* Return the amount that should be added to a vector pointer to move
3119 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3120 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3121 vectorization. */
3123 static tree
3124 vect_get_data_ptr_increment (vec_info *vinfo, gimple_stmt_iterator *gsi,
3125 dr_vec_info *dr_info, tree aggr_type,
3126 vect_memory_access_type memory_access_type,
3127 vec_loop_lens *loop_lens = nullptr)
3129 if (memory_access_type == VMAT_INVARIANT)
3130 return size_zero_node;
3132 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
3133 if (loop_vinfo && LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
3134 return vect_get_loop_variant_data_ptr_increment (vinfo, aggr_type, gsi,
3135 loop_lens, dr_info,
3136 memory_access_type);
3138 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3139 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3140 if (tree_int_cst_sgn (step) == -1)
3141 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3142 return iv_step;
3145 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3147 static bool
3148 vectorizable_bswap (vec_info *vinfo,
3149 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3150 gimple **vec_stmt, slp_tree slp_node,
3151 slp_tree *slp_op,
3152 tree vectype_in, stmt_vector_for_cost *cost_vec)
3154 tree op, vectype;
3155 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3156 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3157 unsigned ncopies;
3159 op = gimple_call_arg (stmt, 0);
3160 vectype = STMT_VINFO_VECTYPE (stmt_info);
3161 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3163 /* Multiple types in SLP are handled by creating the appropriate number of
3164 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3165 case of SLP. */
3166 if (slp_node)
3167 ncopies = 1;
3168 else
3169 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3171 gcc_assert (ncopies >= 1);
3173 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3174 if (! char_vectype)
3175 return false;
3177 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3178 unsigned word_bytes;
3179 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3180 return false;
3182 /* The encoding uses one stepped pattern for each byte in the word. */
3183 vec_perm_builder elts (num_bytes, word_bytes, 3);
3184 for (unsigned i = 0; i < 3; ++i)
3185 for (unsigned j = 0; j < word_bytes; ++j)
3186 elts.quick_push ((i + 1) * word_bytes - j - 1);
3188 vec_perm_indices indices (elts, 1, num_bytes);
3189 machine_mode vmode = TYPE_MODE (char_vectype);
3190 if (!can_vec_perm_const_p (vmode, vmode, indices))
3191 return false;
3193 if (! vec_stmt)
3195 if (slp_node
3196 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3198 if (dump_enabled_p ())
3199 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3200 "incompatible vector types for invariants\n");
3201 return false;
3204 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3205 DUMP_VECT_SCOPE ("vectorizable_bswap");
3206 record_stmt_cost (cost_vec,
3207 1, vector_stmt, stmt_info, 0, vect_prologue);
3208 record_stmt_cost (cost_vec,
3209 slp_node
3210 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3211 vec_perm, stmt_info, 0, vect_body);
3212 return true;
3215 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3217 /* Transform. */
3218 vec<tree> vec_oprnds = vNULL;
3219 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3220 op, &vec_oprnds);
3221 /* Arguments are ready. create the new vector stmt. */
3222 unsigned i;
3223 tree vop;
3224 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3226 gimple *new_stmt;
3227 tree tem = make_ssa_name (char_vectype);
3228 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3229 char_vectype, vop));
3230 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3231 tree tem2 = make_ssa_name (char_vectype);
3232 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3233 tem, tem, bswap_vconst);
3234 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3235 tem = make_ssa_name (vectype);
3236 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3237 vectype, tem2));
3238 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3239 if (slp_node)
3240 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3241 else
3242 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3245 if (!slp_node)
3246 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3248 vec_oprnds.release ();
3249 return true;
3252 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3253 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3254 in a single step. On success, store the binary pack code in
3255 *CONVERT_CODE. */
3257 static bool
3258 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3259 code_helper *convert_code)
3261 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3262 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3263 return false;
3265 code_helper code;
3266 int multi_step_cvt = 0;
3267 auto_vec <tree, 8> interm_types;
3268 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3269 &code, &multi_step_cvt, &interm_types)
3270 || multi_step_cvt)
3271 return false;
3273 *convert_code = code;
3274 return true;
3277 /* Function vectorizable_call.
3279 Check if STMT_INFO performs a function call that can be vectorized.
3280 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3281 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3282 Return true if STMT_INFO is vectorizable in this way. */
3284 static bool
3285 vectorizable_call (vec_info *vinfo,
3286 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3287 gimple **vec_stmt, slp_tree slp_node,
3288 stmt_vector_for_cost *cost_vec)
3290 gcall *stmt;
3291 tree vec_dest;
3292 tree scalar_dest;
3293 tree op;
3294 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3295 tree vectype_out, vectype_in;
3296 poly_uint64 nunits_in;
3297 poly_uint64 nunits_out;
3298 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3299 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3300 tree fndecl, new_temp, rhs_type;
3301 enum vect_def_type dt[4]
3302 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3303 vect_unknown_def_type };
3304 tree vectypes[ARRAY_SIZE (dt)] = {};
3305 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3306 int ndts = ARRAY_SIZE (dt);
3307 int ncopies, j;
3308 auto_vec<tree, 8> vargs;
3309 enum { NARROW, NONE, WIDEN } modifier;
3310 size_t i, nargs;
3311 tree lhs;
3313 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3314 return false;
3316 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3317 && ! vec_stmt)
3318 return false;
3320 /* Is STMT_INFO a vectorizable call? */
3321 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3322 if (!stmt)
3323 return false;
3325 if (gimple_call_internal_p (stmt)
3326 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3327 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3328 /* Handled by vectorizable_load and vectorizable_store. */
3329 return false;
3331 if (gimple_call_lhs (stmt) == NULL_TREE
3332 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3333 return false;
3335 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3337 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3339 /* Process function arguments. */
3340 rhs_type = NULL_TREE;
3341 vectype_in = NULL_TREE;
3342 nargs = gimple_call_num_args (stmt);
3344 /* Bail out if the function has more than four arguments, we do not have
3345 interesting builtin functions to vectorize with more than two arguments
3346 except for fma. No arguments is also not good. */
3347 if (nargs == 0 || nargs > 4)
3348 return false;
3350 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3351 combined_fn cfn = gimple_call_combined_fn (stmt);
3352 if (cfn == CFN_GOMP_SIMD_LANE)
3354 nargs = 0;
3355 rhs_type = unsigned_type_node;
3358 int mask_opno = -1;
3359 if (internal_fn_p (cfn))
3360 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3362 for (i = 0; i < nargs; i++)
3364 if ((int) i == mask_opno)
3366 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3367 &op, &slp_op[i], &dt[i], &vectypes[i]))
3368 return false;
3369 continue;
3372 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3373 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3375 if (dump_enabled_p ())
3376 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3377 "use not simple.\n");
3378 return false;
3381 /* We can only handle calls with arguments of the same type. */
3382 if (rhs_type
3383 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3385 if (dump_enabled_p ())
3386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3387 "argument types differ.\n");
3388 return false;
3390 if (!rhs_type)
3391 rhs_type = TREE_TYPE (op);
3393 if (!vectype_in)
3394 vectype_in = vectypes[i];
3395 else if (vectypes[i]
3396 && !types_compatible_p (vectypes[i], vectype_in))
3398 if (dump_enabled_p ())
3399 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3400 "argument vector types differ.\n");
3401 return false;
3404 /* If all arguments are external or constant defs, infer the vector type
3405 from the scalar type. */
3406 if (!vectype_in)
3407 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3408 if (vec_stmt)
3409 gcc_assert (vectype_in);
3410 if (!vectype_in)
3412 if (dump_enabled_p ())
3413 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3414 "no vectype for scalar type %T\n", rhs_type);
3416 return false;
3418 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3419 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3420 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3421 by a pack of the two vectors into an SI vector. We would need
3422 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3423 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3425 if (dump_enabled_p ())
3426 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3427 "mismatched vector sizes %T and %T\n",
3428 vectype_in, vectype_out);
3429 return false;
3432 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3433 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3435 if (dump_enabled_p ())
3436 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3437 "mixed mask and nonmask vector types\n");
3438 return false;
3441 if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out))
3443 if (dump_enabled_p ())
3444 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3445 "use emulated vector type for call\n");
3446 return false;
3449 /* FORNOW */
3450 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3451 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3452 if (known_eq (nunits_in * 2, nunits_out))
3453 modifier = NARROW;
3454 else if (known_eq (nunits_out, nunits_in))
3455 modifier = NONE;
3456 else if (known_eq (nunits_out * 2, nunits_in))
3457 modifier = WIDEN;
3458 else
3459 return false;
3461 /* We only handle functions that do not read or clobber memory. */
3462 if (gimple_vuse (stmt))
3464 if (dump_enabled_p ())
3465 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3466 "function reads from or writes to memory.\n");
3467 return false;
3470 /* For now, we only vectorize functions if a target specific builtin
3471 is available. TODO -- in some cases, it might be profitable to
3472 insert the calls for pieces of the vector, in order to be able
3473 to vectorize other operations in the loop. */
3474 fndecl = NULL_TREE;
3475 internal_fn ifn = IFN_LAST;
3476 tree callee = gimple_call_fndecl (stmt);
3478 /* First try using an internal function. */
3479 code_helper convert_code = MAX_TREE_CODES;
3480 if (cfn != CFN_LAST
3481 && (modifier == NONE
3482 || (modifier == NARROW
3483 && simple_integer_narrowing (vectype_out, vectype_in,
3484 &convert_code))))
3485 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3486 vectype_in);
3488 /* If that fails, try asking for a target-specific built-in function. */
3489 if (ifn == IFN_LAST)
3491 if (cfn != CFN_LAST)
3492 fndecl = targetm.vectorize.builtin_vectorized_function
3493 (cfn, vectype_out, vectype_in);
3494 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3495 fndecl = targetm.vectorize.builtin_md_vectorized_function
3496 (callee, vectype_out, vectype_in);
3499 if (ifn == IFN_LAST && !fndecl)
3501 if (cfn == CFN_GOMP_SIMD_LANE
3502 && !slp_node
3503 && loop_vinfo
3504 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3505 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3506 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3507 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3509 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3510 { 0, 1, 2, ... vf - 1 } vector. */
3511 gcc_assert (nargs == 0);
3513 else if (modifier == NONE
3514 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3515 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3516 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3517 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3518 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3519 slp_op, vectype_in, cost_vec);
3520 else
3522 if (dump_enabled_p ())
3523 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3524 "function is not vectorizable.\n");
3525 return false;
3529 if (slp_node)
3530 ncopies = 1;
3531 else if (modifier == NARROW && ifn == IFN_LAST)
3532 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3533 else
3534 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3536 /* Sanity check: make sure that at least one copy of the vectorized stmt
3537 needs to be generated. */
3538 gcc_assert (ncopies >= 1);
3540 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3541 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3542 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3543 if (!vec_stmt) /* transformation not required. */
3545 if (slp_node)
3546 for (i = 0; i < nargs; ++i)
3547 if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3548 vectypes[i]
3549 ? vectypes[i] : vectype_in))
3551 if (dump_enabled_p ())
3552 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3553 "incompatible vector types for invariants\n");
3554 return false;
3556 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3557 DUMP_VECT_SCOPE ("vectorizable_call");
3558 vect_model_simple_cost (vinfo, stmt_info,
3559 ncopies, dt, ndts, slp_node, cost_vec);
3560 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3561 record_stmt_cost (cost_vec, ncopies / 2,
3562 vec_promote_demote, stmt_info, 0, vect_body);
3564 if (loop_vinfo
3565 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3566 && (reduc_idx >= 0 || mask_opno >= 0))
3568 if (reduc_idx >= 0
3569 && (cond_fn == IFN_LAST
3570 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3571 OPTIMIZE_FOR_SPEED)))
3573 if (dump_enabled_p ())
3574 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3575 "can't use a fully-masked loop because no"
3576 " conditional operation is available.\n");
3577 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3579 else
3581 unsigned int nvectors
3582 = (slp_node
3583 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3584 : ncopies);
3585 tree scalar_mask = NULL_TREE;
3586 if (mask_opno >= 0)
3587 scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3588 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3589 vectype_out, scalar_mask);
3592 return true;
3595 /* Transform. */
3597 if (dump_enabled_p ())
3598 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3600 /* Handle def. */
3601 scalar_dest = gimple_call_lhs (stmt);
3602 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3604 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3605 unsigned int vect_nargs = nargs;
3606 if (masked_loop_p && reduc_idx >= 0)
3608 ifn = cond_fn;
3609 vect_nargs += 2;
3612 if (modifier == NONE || ifn != IFN_LAST)
3614 tree prev_res = NULL_TREE;
3615 vargs.safe_grow (vect_nargs, true);
3616 auto_vec<vec<tree> > vec_defs (nargs);
3617 for (j = 0; j < ncopies; ++j)
3619 /* Build argument list for the vectorized call. */
3620 if (slp_node)
3622 vec<tree> vec_oprnds0;
3624 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3625 vec_oprnds0 = vec_defs[0];
3627 /* Arguments are ready. Create the new vector stmt. */
3628 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3630 int varg = 0;
3631 if (masked_loop_p && reduc_idx >= 0)
3633 unsigned int vec_num = vec_oprnds0.length ();
3634 /* Always true for SLP. */
3635 gcc_assert (ncopies == 1);
3636 vargs[varg++] = vect_get_loop_mask (loop_vinfo,
3637 gsi, masks, vec_num,
3638 vectype_out, i);
3640 size_t k;
3641 for (k = 0; k < nargs; k++)
3643 vec<tree> vec_oprndsk = vec_defs[k];
3644 vargs[varg++] = vec_oprndsk[i];
3646 if (masked_loop_p && reduc_idx >= 0)
3647 vargs[varg++] = vargs[reduc_idx + 1];
3648 gimple *new_stmt;
3649 if (modifier == NARROW)
3651 /* We don't define any narrowing conditional functions
3652 at present. */
3653 gcc_assert (mask_opno < 0);
3654 tree half_res = make_ssa_name (vectype_in);
3655 gcall *call
3656 = gimple_build_call_internal_vec (ifn, vargs);
3657 gimple_call_set_lhs (call, half_res);
3658 gimple_call_set_nothrow (call, true);
3659 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3660 if ((i & 1) == 0)
3662 prev_res = half_res;
3663 continue;
3665 new_temp = make_ssa_name (vec_dest);
3666 new_stmt = vect_gimple_build (new_temp, convert_code,
3667 prev_res, half_res);
3668 vect_finish_stmt_generation (vinfo, stmt_info,
3669 new_stmt, gsi);
3671 else
3673 if (mask_opno >= 0 && masked_loop_p)
3675 unsigned int vec_num = vec_oprnds0.length ();
3676 /* Always true for SLP. */
3677 gcc_assert (ncopies == 1);
3678 tree mask = vect_get_loop_mask (loop_vinfo,
3679 gsi, masks, vec_num,
3680 vectype_out, i);
3681 vargs[mask_opno] = prepare_vec_mask
3682 (loop_vinfo, TREE_TYPE (mask), mask,
3683 vargs[mask_opno], gsi);
3686 gcall *call;
3687 if (ifn != IFN_LAST)
3688 call = gimple_build_call_internal_vec (ifn, vargs);
3689 else
3690 call = gimple_build_call_vec (fndecl, vargs);
3691 new_temp = make_ssa_name (vec_dest, call);
3692 gimple_call_set_lhs (call, new_temp);
3693 gimple_call_set_nothrow (call, true);
3694 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3695 new_stmt = call;
3697 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3699 continue;
3702 int varg = 0;
3703 if (masked_loop_p && reduc_idx >= 0)
3704 vargs[varg++] = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3705 vectype_out, j);
3706 for (i = 0; i < nargs; i++)
3708 op = gimple_call_arg (stmt, i);
3709 if (j == 0)
3711 vec_defs.quick_push (vNULL);
3712 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3713 op, &vec_defs[i],
3714 vectypes[i]);
3716 vargs[varg++] = vec_defs[i][j];
3718 if (masked_loop_p && reduc_idx >= 0)
3719 vargs[varg++] = vargs[reduc_idx + 1];
3721 if (mask_opno >= 0 && masked_loop_p)
3723 tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3724 vectype_out, j);
3725 vargs[mask_opno]
3726 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3727 vargs[mask_opno], gsi);
3730 gimple *new_stmt;
3731 if (cfn == CFN_GOMP_SIMD_LANE)
3733 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3734 tree new_var
3735 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3736 gimple *init_stmt = gimple_build_assign (new_var, cst);
3737 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3738 new_temp = make_ssa_name (vec_dest);
3739 new_stmt = gimple_build_assign (new_temp, new_var);
3740 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3742 else if (modifier == NARROW)
3744 /* We don't define any narrowing conditional functions at
3745 present. */
3746 gcc_assert (mask_opno < 0);
3747 tree half_res = make_ssa_name (vectype_in);
3748 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3749 gimple_call_set_lhs (call, half_res);
3750 gimple_call_set_nothrow (call, true);
3751 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3752 if ((j & 1) == 0)
3754 prev_res = half_res;
3755 continue;
3757 new_temp = make_ssa_name (vec_dest);
3758 new_stmt = vect_gimple_build (new_temp, convert_code, prev_res,
3759 half_res);
3760 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3762 else
3764 gcall *call;
3765 if (ifn != IFN_LAST)
3766 call = gimple_build_call_internal_vec (ifn, vargs);
3767 else
3768 call = gimple_build_call_vec (fndecl, vargs);
3769 new_temp = make_ssa_name (vec_dest, call);
3770 gimple_call_set_lhs (call, new_temp);
3771 gimple_call_set_nothrow (call, true);
3772 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3773 new_stmt = call;
3776 if (j == (modifier == NARROW ? 1 : 0))
3777 *vec_stmt = new_stmt;
3778 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3780 for (i = 0; i < nargs; i++)
3782 vec<tree> vec_oprndsi = vec_defs[i];
3783 vec_oprndsi.release ();
3786 else if (modifier == NARROW)
3788 auto_vec<vec<tree> > vec_defs (nargs);
3789 /* We don't define any narrowing conditional functions at present. */
3790 gcc_assert (mask_opno < 0);
3791 for (j = 0; j < ncopies; ++j)
3793 /* Build argument list for the vectorized call. */
3794 if (j == 0)
3795 vargs.create (nargs * 2);
3796 else
3797 vargs.truncate (0);
3799 if (slp_node)
3801 vec<tree> vec_oprnds0;
3803 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3804 vec_oprnds0 = vec_defs[0];
3806 /* Arguments are ready. Create the new vector stmt. */
3807 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3809 size_t k;
3810 vargs.truncate (0);
3811 for (k = 0; k < nargs; k++)
3813 vec<tree> vec_oprndsk = vec_defs[k];
3814 vargs.quick_push (vec_oprndsk[i]);
3815 vargs.quick_push (vec_oprndsk[i + 1]);
3817 gcall *call;
3818 if (ifn != IFN_LAST)
3819 call = gimple_build_call_internal_vec (ifn, vargs);
3820 else
3821 call = gimple_build_call_vec (fndecl, vargs);
3822 new_temp = make_ssa_name (vec_dest, call);
3823 gimple_call_set_lhs (call, new_temp);
3824 gimple_call_set_nothrow (call, true);
3825 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3826 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3828 continue;
3831 for (i = 0; i < nargs; i++)
3833 op = gimple_call_arg (stmt, i);
3834 if (j == 0)
3836 vec_defs.quick_push (vNULL);
3837 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3838 op, &vec_defs[i], vectypes[i]);
3840 vec_oprnd0 = vec_defs[i][2*j];
3841 vec_oprnd1 = vec_defs[i][2*j+1];
3843 vargs.quick_push (vec_oprnd0);
3844 vargs.quick_push (vec_oprnd1);
3847 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3848 new_temp = make_ssa_name (vec_dest, new_stmt);
3849 gimple_call_set_lhs (new_stmt, new_temp);
3850 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3852 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3855 if (!slp_node)
3856 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3858 for (i = 0; i < nargs; i++)
3860 vec<tree> vec_oprndsi = vec_defs[i];
3861 vec_oprndsi.release ();
3864 else
3865 /* No current target implements this case. */
3866 return false;
3868 vargs.release ();
3870 /* The call in STMT might prevent it from being removed in dce.
3871 We however cannot remove it here, due to the way the ssa name
3872 it defines is mapped to the new definition. So just replace
3873 rhs of the statement with something harmless. */
3875 if (slp_node)
3876 return true;
3878 stmt_info = vect_orig_stmt (stmt_info);
3879 lhs = gimple_get_lhs (stmt_info->stmt);
3881 gassign *new_stmt
3882 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3883 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3885 return true;
3889 struct simd_call_arg_info
3891 tree vectype;
3892 tree op;
3893 HOST_WIDE_INT linear_step;
3894 enum vect_def_type dt;
3895 unsigned int align;
3896 bool simd_lane_linear;
3899 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3900 is linear within simd lane (but not within whole loop), note it in
3901 *ARGINFO. */
3903 static void
3904 vect_simd_lane_linear (tree op, class loop *loop,
3905 struct simd_call_arg_info *arginfo)
3907 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3909 if (!is_gimple_assign (def_stmt)
3910 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3911 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3912 return;
3914 tree base = gimple_assign_rhs1 (def_stmt);
3915 HOST_WIDE_INT linear_step = 0;
3916 tree v = gimple_assign_rhs2 (def_stmt);
3917 while (TREE_CODE (v) == SSA_NAME)
3919 tree t;
3920 def_stmt = SSA_NAME_DEF_STMT (v);
3921 if (is_gimple_assign (def_stmt))
3922 switch (gimple_assign_rhs_code (def_stmt))
3924 case PLUS_EXPR:
3925 t = gimple_assign_rhs2 (def_stmt);
3926 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3927 return;
3928 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3929 v = gimple_assign_rhs1 (def_stmt);
3930 continue;
3931 case MULT_EXPR:
3932 t = gimple_assign_rhs2 (def_stmt);
3933 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3934 return;
3935 linear_step = tree_to_shwi (t);
3936 v = gimple_assign_rhs1 (def_stmt);
3937 continue;
3938 CASE_CONVERT:
3939 t = gimple_assign_rhs1 (def_stmt);
3940 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3941 || (TYPE_PRECISION (TREE_TYPE (v))
3942 < TYPE_PRECISION (TREE_TYPE (t))))
3943 return;
3944 if (!linear_step)
3945 linear_step = 1;
3946 v = t;
3947 continue;
3948 default:
3949 return;
3951 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3952 && loop->simduid
3953 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3954 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3955 == loop->simduid))
3957 if (!linear_step)
3958 linear_step = 1;
3959 arginfo->linear_step = linear_step;
3960 arginfo->op = base;
3961 arginfo->simd_lane_linear = true;
3962 return;
3967 /* Return the number of elements in vector type VECTYPE, which is associated
3968 with a SIMD clone. At present these vectors always have a constant
3969 length. */
3971 static unsigned HOST_WIDE_INT
3972 simd_clone_subparts (tree vectype)
3974 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3977 /* Function vectorizable_simd_clone_call.
3979 Check if STMT_INFO performs a function call that can be vectorized
3980 by calling a simd clone of the function.
3981 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3982 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3983 Return true if STMT_INFO is vectorizable in this way. */
3985 static bool
3986 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3987 gimple_stmt_iterator *gsi,
3988 gimple **vec_stmt, slp_tree slp_node,
3989 stmt_vector_for_cost *)
3991 tree vec_dest;
3992 tree scalar_dest;
3993 tree op, type;
3994 tree vec_oprnd0 = NULL_TREE;
3995 tree vectype;
3996 poly_uint64 nunits;
3997 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3998 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3999 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
4000 tree fndecl, new_temp;
4001 int ncopies, j;
4002 auto_vec<simd_call_arg_info> arginfo;
4003 vec<tree> vargs = vNULL;
4004 size_t i, nargs;
4005 tree lhs, rtype, ratype;
4006 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
4007 int arg_offset = 0;
4009 /* Is STMT a vectorizable call? */
4010 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4011 if (!stmt)
4012 return false;
4014 fndecl = gimple_call_fndecl (stmt);
4015 if (fndecl == NULL_TREE
4016 && gimple_call_internal_p (stmt, IFN_MASK_CALL))
4018 fndecl = gimple_call_arg (stmt, 0);
4019 gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
4020 fndecl = TREE_OPERAND (fndecl, 0);
4021 gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
4022 arg_offset = 1;
4024 if (fndecl == NULL_TREE)
4025 return false;
4027 struct cgraph_node *node = cgraph_node::get (fndecl);
4028 if (node == NULL || node->simd_clones == NULL)
4029 return false;
4031 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4032 return false;
4034 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4035 && ! vec_stmt)
4036 return false;
4038 if (gimple_call_lhs (stmt)
4039 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
4040 return false;
4042 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
4044 vectype = STMT_VINFO_VECTYPE (stmt_info);
4046 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
4047 return false;
4049 /* FORNOW */
4050 if (slp_node)
4051 return false;
4053 /* Process function arguments. */
4054 nargs = gimple_call_num_args (stmt) - arg_offset;
4056 /* Bail out if the function has zero arguments. */
4057 if (nargs == 0)
4058 return false;
4060 arginfo.reserve (nargs, true);
4062 for (i = 0; i < nargs; i++)
4064 simd_call_arg_info thisarginfo;
4065 affine_iv iv;
4067 thisarginfo.linear_step = 0;
4068 thisarginfo.align = 0;
4069 thisarginfo.op = NULL_TREE;
4070 thisarginfo.simd_lane_linear = false;
4072 op = gimple_call_arg (stmt, i + arg_offset);
4073 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
4074 &thisarginfo.vectype)
4075 || thisarginfo.dt == vect_uninitialized_def)
4077 if (dump_enabled_p ())
4078 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4079 "use not simple.\n");
4080 return false;
4083 if (thisarginfo.dt == vect_constant_def
4084 || thisarginfo.dt == vect_external_def)
4085 gcc_assert (thisarginfo.vectype == NULL_TREE);
4086 else
4087 gcc_assert (thisarginfo.vectype != NULL_TREE);
4089 /* For linear arguments, the analyze phase should have saved
4090 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
4091 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
4092 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
4094 gcc_assert (vec_stmt);
4095 thisarginfo.linear_step
4096 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
4097 thisarginfo.op
4098 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
4099 thisarginfo.simd_lane_linear
4100 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
4101 == boolean_true_node);
4102 /* If loop has been peeled for alignment, we need to adjust it. */
4103 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4104 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4105 if (n1 != n2 && !thisarginfo.simd_lane_linear)
4107 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4108 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
4109 tree opt = TREE_TYPE (thisarginfo.op);
4110 bias = fold_convert (TREE_TYPE (step), bias);
4111 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4112 thisarginfo.op
4113 = fold_build2 (POINTER_TYPE_P (opt)
4114 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4115 thisarginfo.op, bias);
4118 else if (!vec_stmt
4119 && thisarginfo.dt != vect_constant_def
4120 && thisarginfo.dt != vect_external_def
4121 && loop_vinfo
4122 && TREE_CODE (op) == SSA_NAME
4123 && simple_iv (loop, loop_containing_stmt (stmt), op,
4124 &iv, false)
4125 && tree_fits_shwi_p (iv.step))
4127 thisarginfo.linear_step = tree_to_shwi (iv.step);
4128 thisarginfo.op = iv.base;
4130 else if ((thisarginfo.dt == vect_constant_def
4131 || thisarginfo.dt == vect_external_def)
4132 && POINTER_TYPE_P (TREE_TYPE (op)))
4133 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4134 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4135 linear too. */
4136 if (POINTER_TYPE_P (TREE_TYPE (op))
4137 && !thisarginfo.linear_step
4138 && !vec_stmt
4139 && thisarginfo.dt != vect_constant_def
4140 && thisarginfo.dt != vect_external_def
4141 && loop_vinfo
4142 && !slp_node
4143 && TREE_CODE (op) == SSA_NAME)
4144 vect_simd_lane_linear (op, loop, &thisarginfo);
4146 arginfo.quick_push (thisarginfo);
4149 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4150 if (!vf.is_constant ())
4152 if (dump_enabled_p ())
4153 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4154 "not considering SIMD clones; not yet supported"
4155 " for variable-width vectors.\n");
4156 return false;
4159 unsigned int badness = 0;
4160 struct cgraph_node *bestn = NULL;
4161 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4162 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4163 else
4164 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4165 n = n->simdclone->next_clone)
4167 unsigned int this_badness = 0;
4168 unsigned int num_calls;
4169 if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
4170 || n->simdclone->nargs != nargs)
4171 continue;
4172 if (num_calls != 1)
4173 this_badness += exact_log2 (num_calls) * 4096;
4174 if (n->simdclone->inbranch)
4175 this_badness += 8192;
4176 int target_badness = targetm.simd_clone.usable (n);
4177 if (target_badness < 0)
4178 continue;
4179 this_badness += target_badness * 512;
4180 for (i = 0; i < nargs; i++)
4182 switch (n->simdclone->args[i].arg_type)
4184 case SIMD_CLONE_ARG_TYPE_VECTOR:
4185 if (!useless_type_conversion_p
4186 (n->simdclone->args[i].orig_type,
4187 TREE_TYPE (gimple_call_arg (stmt, i + arg_offset))))
4188 i = -1;
4189 else if (arginfo[i].dt == vect_constant_def
4190 || arginfo[i].dt == vect_external_def
4191 || arginfo[i].linear_step)
4192 this_badness += 64;
4193 break;
4194 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4195 if (arginfo[i].dt != vect_constant_def
4196 && arginfo[i].dt != vect_external_def)
4197 i = -1;
4198 break;
4199 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4200 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4201 if (arginfo[i].dt == vect_constant_def
4202 || arginfo[i].dt == vect_external_def
4203 || (arginfo[i].linear_step
4204 != n->simdclone->args[i].linear_step))
4205 i = -1;
4206 break;
4207 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4208 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4209 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4210 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4211 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4212 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4213 /* FORNOW */
4214 i = -1;
4215 break;
4216 case SIMD_CLONE_ARG_TYPE_MASK:
4217 break;
4219 if (i == (size_t) -1)
4220 break;
4221 if (n->simdclone->args[i].alignment > arginfo[i].align)
4223 i = -1;
4224 break;
4226 if (arginfo[i].align)
4227 this_badness += (exact_log2 (arginfo[i].align)
4228 - exact_log2 (n->simdclone->args[i].alignment));
4230 if (i == (size_t) -1)
4231 continue;
4232 if (bestn == NULL || this_badness < badness)
4234 bestn = n;
4235 badness = this_badness;
4239 if (bestn == NULL)
4240 return false;
4242 for (i = 0; i < nargs; i++)
4244 if ((arginfo[i].dt == vect_constant_def
4245 || arginfo[i].dt == vect_external_def)
4246 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4248 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i + arg_offset));
4249 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4250 slp_node);
4251 if (arginfo[i].vectype == NULL
4252 || !constant_multiple_p (bestn->simdclone->simdlen,
4253 simd_clone_subparts (arginfo[i].vectype)))
4254 return false;
4257 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR
4258 && VECTOR_BOOLEAN_TYPE_P (bestn->simdclone->args[i].vector_type))
4260 if (dump_enabled_p ())
4261 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4262 "vector mask arguments are not supported.\n");
4263 return false;
4266 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK
4267 && bestn->simdclone->mask_mode == VOIDmode
4268 && (simd_clone_subparts (bestn->simdclone->args[i].vector_type)
4269 != simd_clone_subparts (arginfo[i].vectype)))
4271 /* FORNOW we only have partial support for vector-type masks that
4272 can't hold all of simdlen. */
4273 if (dump_enabled_p ())
4274 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4275 vect_location,
4276 "in-branch vector clones are not yet"
4277 " supported for mismatched vector sizes.\n");
4278 return false;
4280 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK
4281 && bestn->simdclone->mask_mode != VOIDmode)
4283 /* FORNOW don't support integer-type masks. */
4284 if (dump_enabled_p ())
4285 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4286 vect_location,
4287 "in-branch vector clones are not yet"
4288 " supported for integer mask modes.\n");
4289 return false;
4293 fndecl = bestn->decl;
4294 nunits = bestn->simdclone->simdlen;
4295 ncopies = vector_unroll_factor (vf, nunits);
4297 /* If the function isn't const, only allow it in simd loops where user
4298 has asserted that at least nunits consecutive iterations can be
4299 performed using SIMD instructions. */
4300 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4301 && gimple_vuse (stmt))
4302 return false;
4304 /* Sanity check: make sure that at least one copy of the vectorized stmt
4305 needs to be generated. */
4306 gcc_assert (ncopies >= 1);
4308 if (!vec_stmt) /* transformation not required. */
4310 /* When the original call is pure or const but the SIMD ABI dictates
4311 an aggregate return we will have to use a virtual definition and
4312 in a loop eventually even need to add a virtual PHI. That's
4313 not straight-forward so allow to fix this up via renaming. */
4314 if (gimple_call_lhs (stmt)
4315 && !gimple_vdef (stmt)
4316 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
4317 vinfo->any_known_not_updated_vssa = true;
4318 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4319 for (i = 0; i < nargs; i++)
4320 if ((bestn->simdclone->args[i].arg_type
4321 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4322 || (bestn->simdclone->args[i].arg_type
4323 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4325 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4326 + 1,
4327 true);
4328 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4329 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4330 ? size_type_node : TREE_TYPE (arginfo[i].op);
4331 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4332 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4333 tree sll = arginfo[i].simd_lane_linear
4334 ? boolean_true_node : boolean_false_node;
4335 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4337 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4338 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4339 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4340 dt, slp_node, cost_vec); */
4341 return true;
4344 /* Transform. */
4346 if (dump_enabled_p ())
4347 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4349 /* Handle def. */
4350 scalar_dest = gimple_call_lhs (stmt);
4351 vec_dest = NULL_TREE;
4352 rtype = NULL_TREE;
4353 ratype = NULL_TREE;
4354 if (scalar_dest)
4356 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4357 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4358 if (TREE_CODE (rtype) == ARRAY_TYPE)
4360 ratype = rtype;
4361 rtype = TREE_TYPE (ratype);
4365 auto_vec<vec<tree> > vec_oprnds;
4366 auto_vec<unsigned> vec_oprnds_i;
4367 vec_oprnds.safe_grow_cleared (nargs, true);
4368 vec_oprnds_i.safe_grow_cleared (nargs, true);
4369 for (j = 0; j < ncopies; ++j)
4371 /* Build argument list for the vectorized call. */
4372 if (j == 0)
4373 vargs.create (nargs);
4374 else
4375 vargs.truncate (0);
4377 for (i = 0; i < nargs; i++)
4379 unsigned int k, l, m, o;
4380 tree atype;
4381 op = gimple_call_arg (stmt, i + arg_offset);
4382 switch (bestn->simdclone->args[i].arg_type)
4384 case SIMD_CLONE_ARG_TYPE_VECTOR:
4385 atype = bestn->simdclone->args[i].vector_type;
4386 o = vector_unroll_factor (nunits,
4387 simd_clone_subparts (atype));
4388 for (m = j * o; m < (j + 1) * o; m++)
4390 if (simd_clone_subparts (atype)
4391 < simd_clone_subparts (arginfo[i].vectype))
4393 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4394 k = (simd_clone_subparts (arginfo[i].vectype)
4395 / simd_clone_subparts (atype));
4396 gcc_assert ((k & (k - 1)) == 0);
4397 if (m == 0)
4399 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4400 ncopies * o / k, op,
4401 &vec_oprnds[i]);
4402 vec_oprnds_i[i] = 0;
4403 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4405 else
4407 vec_oprnd0 = arginfo[i].op;
4408 if ((m & (k - 1)) == 0)
4409 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4411 arginfo[i].op = vec_oprnd0;
4412 vec_oprnd0
4413 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4414 bitsize_int (prec),
4415 bitsize_int ((m & (k - 1)) * prec));
4416 gassign *new_stmt
4417 = gimple_build_assign (make_ssa_name (atype),
4418 vec_oprnd0);
4419 vect_finish_stmt_generation (vinfo, stmt_info,
4420 new_stmt, gsi);
4421 vargs.safe_push (gimple_assign_lhs (new_stmt));
4423 else
4425 k = (simd_clone_subparts (atype)
4426 / simd_clone_subparts (arginfo[i].vectype));
4427 gcc_assert ((k & (k - 1)) == 0);
4428 vec<constructor_elt, va_gc> *ctor_elts;
4429 if (k != 1)
4430 vec_alloc (ctor_elts, k);
4431 else
4432 ctor_elts = NULL;
4433 for (l = 0; l < k; l++)
4435 if (m == 0 && l == 0)
4437 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4438 k * o * ncopies,
4440 &vec_oprnds[i]);
4441 vec_oprnds_i[i] = 0;
4442 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4444 else
4445 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4446 arginfo[i].op = vec_oprnd0;
4447 if (k == 1)
4448 break;
4449 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4450 vec_oprnd0);
4452 if (k == 1)
4453 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4454 atype))
4456 vec_oprnd0
4457 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4458 gassign *new_stmt
4459 = gimple_build_assign (make_ssa_name (atype),
4460 vec_oprnd0);
4461 vect_finish_stmt_generation (vinfo, stmt_info,
4462 new_stmt, gsi);
4463 vargs.safe_push (gimple_assign_lhs (new_stmt));
4465 else
4466 vargs.safe_push (vec_oprnd0);
4467 else
4469 vec_oprnd0 = build_constructor (atype, ctor_elts);
4470 gassign *new_stmt
4471 = gimple_build_assign (make_ssa_name (atype),
4472 vec_oprnd0);
4473 vect_finish_stmt_generation (vinfo, stmt_info,
4474 new_stmt, gsi);
4475 vargs.safe_push (gimple_assign_lhs (new_stmt));
4479 break;
4480 case SIMD_CLONE_ARG_TYPE_MASK:
4481 atype = bestn->simdclone->args[i].vector_type;
4482 if (bestn->simdclone->mask_mode != VOIDmode)
4484 /* FORNOW: this is disabled above. */
4485 gcc_unreachable ();
4487 else
4489 tree elt_type = TREE_TYPE (atype);
4490 tree one = fold_convert (elt_type, integer_one_node);
4491 tree zero = fold_convert (elt_type, integer_zero_node);
4492 o = vector_unroll_factor (nunits,
4493 simd_clone_subparts (atype));
4494 for (m = j * o; m < (j + 1) * o; m++)
4496 if (simd_clone_subparts (atype)
4497 < simd_clone_subparts (arginfo[i].vectype))
4499 /* The mask type has fewer elements than simdlen. */
4501 /* FORNOW */
4502 gcc_unreachable ();
4504 else if (simd_clone_subparts (atype)
4505 == simd_clone_subparts (arginfo[i].vectype))
4507 /* The SIMD clone function has the same number of
4508 elements as the current function. */
4509 if (m == 0)
4511 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4512 o * ncopies,
4514 &vec_oprnds[i]);
4515 vec_oprnds_i[i] = 0;
4517 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4518 vec_oprnd0
4519 = build3 (VEC_COND_EXPR, atype, vec_oprnd0,
4520 build_vector_from_val (atype, one),
4521 build_vector_from_val (atype, zero));
4522 gassign *new_stmt
4523 = gimple_build_assign (make_ssa_name (atype),
4524 vec_oprnd0);
4525 vect_finish_stmt_generation (vinfo, stmt_info,
4526 new_stmt, gsi);
4527 vargs.safe_push (gimple_assign_lhs (new_stmt));
4529 else
4531 /* The mask type has more elements than simdlen. */
4533 /* FORNOW */
4534 gcc_unreachable ();
4538 break;
4539 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4540 vargs.safe_push (op);
4541 break;
4542 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4543 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4544 if (j == 0)
4546 gimple_seq stmts;
4547 arginfo[i].op
4548 = force_gimple_operand (unshare_expr (arginfo[i].op),
4549 &stmts, true, NULL_TREE);
4550 if (stmts != NULL)
4552 basic_block new_bb;
4553 edge pe = loop_preheader_edge (loop);
4554 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4555 gcc_assert (!new_bb);
4557 if (arginfo[i].simd_lane_linear)
4559 vargs.safe_push (arginfo[i].op);
4560 break;
4562 tree phi_res = copy_ssa_name (op);
4563 gphi *new_phi = create_phi_node (phi_res, loop->header);
4564 add_phi_arg (new_phi, arginfo[i].op,
4565 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4566 enum tree_code code
4567 = POINTER_TYPE_P (TREE_TYPE (op))
4568 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4569 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4570 ? sizetype : TREE_TYPE (op);
4571 poly_widest_int cst
4572 = wi::mul (bestn->simdclone->args[i].linear_step,
4573 ncopies * nunits);
4574 tree tcst = wide_int_to_tree (type, cst);
4575 tree phi_arg = copy_ssa_name (op);
4576 gassign *new_stmt
4577 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4578 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4579 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4580 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4581 UNKNOWN_LOCATION);
4582 arginfo[i].op = phi_res;
4583 vargs.safe_push (phi_res);
4585 else
4587 enum tree_code code
4588 = POINTER_TYPE_P (TREE_TYPE (op))
4589 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4590 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4591 ? sizetype : TREE_TYPE (op);
4592 poly_widest_int cst
4593 = wi::mul (bestn->simdclone->args[i].linear_step,
4594 j * nunits);
4595 tree tcst = wide_int_to_tree (type, cst);
4596 new_temp = make_ssa_name (TREE_TYPE (op));
4597 gassign *new_stmt
4598 = gimple_build_assign (new_temp, code,
4599 arginfo[i].op, tcst);
4600 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4601 vargs.safe_push (new_temp);
4603 break;
4604 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4605 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4606 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4607 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4608 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4609 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4610 default:
4611 gcc_unreachable ();
4615 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4616 if (vec_dest)
4618 gcc_assert (ratype
4619 || known_eq (simd_clone_subparts (rtype), nunits));
4620 if (ratype)
4621 new_temp = create_tmp_var (ratype);
4622 else if (useless_type_conversion_p (vectype, rtype))
4623 new_temp = make_ssa_name (vec_dest, new_call);
4624 else
4625 new_temp = make_ssa_name (rtype, new_call);
4626 gimple_call_set_lhs (new_call, new_temp);
4628 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4629 gimple *new_stmt = new_call;
4631 if (vec_dest)
4633 if (!multiple_p (simd_clone_subparts (vectype), nunits))
4635 unsigned int k, l;
4636 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4637 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4638 k = vector_unroll_factor (nunits,
4639 simd_clone_subparts (vectype));
4640 gcc_assert ((k & (k - 1)) == 0);
4641 for (l = 0; l < k; l++)
4643 tree t;
4644 if (ratype)
4646 t = build_fold_addr_expr (new_temp);
4647 t = build2 (MEM_REF, vectype, t,
4648 build_int_cst (TREE_TYPE (t), l * bytes));
4650 else
4651 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4652 bitsize_int (prec), bitsize_int (l * prec));
4653 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4654 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4656 if (j == 0 && l == 0)
4657 *vec_stmt = new_stmt;
4658 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4661 if (ratype)
4662 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4663 continue;
4665 else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4667 unsigned int k = (simd_clone_subparts (vectype)
4668 / simd_clone_subparts (rtype));
4669 gcc_assert ((k & (k - 1)) == 0);
4670 if ((j & (k - 1)) == 0)
4671 vec_alloc (ret_ctor_elts, k);
4672 if (ratype)
4674 unsigned int m, o;
4675 o = vector_unroll_factor (nunits,
4676 simd_clone_subparts (rtype));
4677 for (m = 0; m < o; m++)
4679 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4680 size_int (m), NULL_TREE, NULL_TREE);
4681 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4682 tem);
4683 vect_finish_stmt_generation (vinfo, stmt_info,
4684 new_stmt, gsi);
4685 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4686 gimple_assign_lhs (new_stmt));
4688 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4690 else
4691 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4692 if ((j & (k - 1)) != k - 1)
4693 continue;
4694 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4695 new_stmt
4696 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4697 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4699 if ((unsigned) j == k - 1)
4700 *vec_stmt = new_stmt;
4701 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4702 continue;
4704 else if (ratype)
4706 tree t = build_fold_addr_expr (new_temp);
4707 t = build2 (MEM_REF, vectype, t,
4708 build_int_cst (TREE_TYPE (t), 0));
4709 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4710 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4711 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4713 else if (!useless_type_conversion_p (vectype, rtype))
4715 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4716 new_stmt
4717 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4718 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4722 if (j == 0)
4723 *vec_stmt = new_stmt;
4724 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4727 for (i = 0; i < nargs; ++i)
4729 vec<tree> oprndsi = vec_oprnds[i];
4730 oprndsi.release ();
4732 vargs.release ();
4734 /* Mark the clone as no longer being a candidate for GC. */
4735 bestn->gc_candidate = false;
4737 /* The call in STMT might prevent it from being removed in dce.
4738 We however cannot remove it here, due to the way the ssa name
4739 it defines is mapped to the new definition. So just replace
4740 rhs of the statement with something harmless. */
4742 if (slp_node)
4743 return true;
4745 gimple *new_stmt;
4746 if (scalar_dest)
4748 type = TREE_TYPE (scalar_dest);
4749 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4750 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4752 else
4753 new_stmt = gimple_build_nop ();
4754 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4755 unlink_stmt_vdef (stmt);
4757 return true;
4761 /* Function vect_gen_widened_results_half
4763 Create a vector stmt whose code, type, number of arguments, and result
4764 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4765 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4766 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4767 needs to be created (DECL is a function-decl of a target-builtin).
4768 STMT_INFO is the original scalar stmt that we are vectorizing. */
4770 static gimple *
4771 vect_gen_widened_results_half (vec_info *vinfo, code_helper ch,
4772 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4773 tree vec_dest, gimple_stmt_iterator *gsi,
4774 stmt_vec_info stmt_info)
4776 gimple *new_stmt;
4777 tree new_temp;
4779 /* Generate half of the widened result: */
4780 if (op_type != binary_op)
4781 vec_oprnd1 = NULL;
4782 new_stmt = vect_gimple_build (vec_dest, ch, vec_oprnd0, vec_oprnd1);
4783 new_temp = make_ssa_name (vec_dest, new_stmt);
4784 gimple_set_lhs (new_stmt, new_temp);
4785 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4787 return new_stmt;
4791 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4792 For multi-step conversions store the resulting vectors and call the function
4793 recursively. When NARROW_SRC_P is true, there's still a conversion after
4794 narrowing, don't store the vectors in the SLP_NODE or in vector info of
4795 the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */
4797 static void
4798 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4799 int multi_step_cvt,
4800 stmt_vec_info stmt_info,
4801 vec<tree> &vec_dsts,
4802 gimple_stmt_iterator *gsi,
4803 slp_tree slp_node, code_helper code,
4804 bool narrow_src_p)
4806 unsigned int i;
4807 tree vop0, vop1, new_tmp, vec_dest;
4809 vec_dest = vec_dsts.pop ();
4811 for (i = 0; i < vec_oprnds->length (); i += 2)
4813 /* Create demotion operation. */
4814 vop0 = (*vec_oprnds)[i];
4815 vop1 = (*vec_oprnds)[i + 1];
4816 gimple *new_stmt = vect_gimple_build (vec_dest, code, vop0, vop1);
4817 new_tmp = make_ssa_name (vec_dest, new_stmt);
4818 gimple_set_lhs (new_stmt, new_tmp);
4819 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4820 if (multi_step_cvt || narrow_src_p)
4821 /* Store the resulting vector for next recursive call,
4822 or return the resulting vector_tmp for NARROW FLOAT_EXPR. */
4823 (*vec_oprnds)[i/2] = new_tmp;
4824 else
4826 /* This is the last step of the conversion sequence. Store the
4827 vectors in SLP_NODE or in vector info of the scalar statement
4828 (or in STMT_VINFO_RELATED_STMT chain). */
4829 if (slp_node)
4830 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4831 else
4832 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4836 /* For multi-step demotion operations we first generate demotion operations
4837 from the source type to the intermediate types, and then combine the
4838 results (stored in VEC_OPRNDS) in demotion operation to the destination
4839 type. */
4840 if (multi_step_cvt)
4842 /* At each level of recursion we have half of the operands we had at the
4843 previous level. */
4844 vec_oprnds->truncate ((i+1)/2);
4845 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4846 multi_step_cvt - 1,
4847 stmt_info, vec_dsts, gsi,
4848 slp_node, VEC_PACK_TRUNC_EXPR,
4849 narrow_src_p);
4852 vec_dsts.quick_push (vec_dest);
4856 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4857 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4858 STMT_INFO. For multi-step conversions store the resulting vectors and
4859 call the function recursively. */
4861 static void
4862 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4863 vec<tree> *vec_oprnds0,
4864 vec<tree> *vec_oprnds1,
4865 stmt_vec_info stmt_info, tree vec_dest,
4866 gimple_stmt_iterator *gsi,
4867 code_helper ch1,
4868 code_helper ch2, int op_type)
4870 int i;
4871 tree vop0, vop1, new_tmp1, new_tmp2;
4872 gimple *new_stmt1, *new_stmt2;
4873 vec<tree> vec_tmp = vNULL;
4875 vec_tmp.create (vec_oprnds0->length () * 2);
4876 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4878 if (op_type == binary_op)
4879 vop1 = (*vec_oprnds1)[i];
4880 else
4881 vop1 = NULL_TREE;
4883 /* Generate the two halves of promotion operation. */
4884 new_stmt1 = vect_gen_widened_results_half (vinfo, ch1, vop0, vop1,
4885 op_type, vec_dest, gsi,
4886 stmt_info);
4887 new_stmt2 = vect_gen_widened_results_half (vinfo, ch2, vop0, vop1,
4888 op_type, vec_dest, gsi,
4889 stmt_info);
4890 if (is_gimple_call (new_stmt1))
4892 new_tmp1 = gimple_call_lhs (new_stmt1);
4893 new_tmp2 = gimple_call_lhs (new_stmt2);
4895 else
4897 new_tmp1 = gimple_assign_lhs (new_stmt1);
4898 new_tmp2 = gimple_assign_lhs (new_stmt2);
4901 /* Store the results for the next step. */
4902 vec_tmp.quick_push (new_tmp1);
4903 vec_tmp.quick_push (new_tmp2);
4906 vec_oprnds0->release ();
4907 *vec_oprnds0 = vec_tmp;
4910 /* Create vectorized promotion stmts for widening stmts using only half the
4911 potential vector size for input. */
4912 static void
4913 vect_create_half_widening_stmts (vec_info *vinfo,
4914 vec<tree> *vec_oprnds0,
4915 vec<tree> *vec_oprnds1,
4916 stmt_vec_info stmt_info, tree vec_dest,
4917 gimple_stmt_iterator *gsi,
4918 code_helper code1,
4919 int op_type)
4921 int i;
4922 tree vop0, vop1;
4923 gimple *new_stmt1;
4924 gimple *new_stmt2;
4925 gimple *new_stmt3;
4926 vec<tree> vec_tmp = vNULL;
4928 vec_tmp.create (vec_oprnds0->length ());
4929 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4931 tree new_tmp1, new_tmp2, new_tmp3, out_type;
4933 gcc_assert (op_type == binary_op);
4934 vop1 = (*vec_oprnds1)[i];
4936 /* Widen the first vector input. */
4937 out_type = TREE_TYPE (vec_dest);
4938 new_tmp1 = make_ssa_name (out_type);
4939 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
4940 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
4941 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
4943 /* Widen the second vector input. */
4944 new_tmp2 = make_ssa_name (out_type);
4945 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
4946 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
4947 /* Perform the operation. With both vector inputs widened. */
4948 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, new_tmp2);
4950 else
4952 /* Perform the operation. With the single vector input widened. */
4953 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, vop1);
4956 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
4957 gimple_assign_set_lhs (new_stmt3, new_tmp3);
4958 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
4960 /* Store the results for the next step. */
4961 vec_tmp.quick_push (new_tmp3);
4964 vec_oprnds0->release ();
4965 *vec_oprnds0 = vec_tmp;
4969 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4970 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4971 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4972 Return true if STMT_INFO is vectorizable in this way. */
4974 static bool
4975 vectorizable_conversion (vec_info *vinfo,
4976 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4977 gimple **vec_stmt, slp_tree slp_node,
4978 stmt_vector_for_cost *cost_vec)
4980 tree vec_dest, cvt_op = NULL_TREE;
4981 tree scalar_dest;
4982 tree op0, op1 = NULL_TREE;
4983 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4984 tree_code tc1, tc2;
4985 code_helper code, code1, code2;
4986 code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4987 tree new_temp;
4988 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4989 int ndts = 2;
4990 poly_uint64 nunits_in;
4991 poly_uint64 nunits_out;
4992 tree vectype_out, vectype_in;
4993 int ncopies, i;
4994 tree lhs_type, rhs_type;
4995 /* For conversions between floating point and integer, there're 2 NARROW
4996 cases. NARROW_SRC is for FLOAT_EXPR, means
4997 integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
4998 This is safe when the range of the source integer can fit into the lower
4999 precision. NARROW_DST is for FIX_TRUNC_EXPR, means
5000 floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
5001 For other conversions, when there's narrowing, NARROW_DST is used as
5002 default. */
5003 enum { NARROW_SRC, NARROW_DST, NONE, WIDEN } modifier;
5004 vec<tree> vec_oprnds0 = vNULL;
5005 vec<tree> vec_oprnds1 = vNULL;
5006 tree vop0;
5007 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5008 int multi_step_cvt = 0;
5009 vec<tree> interm_types = vNULL;
5010 tree intermediate_type, cvt_type = NULL_TREE;
5011 int op_type;
5012 unsigned short fltsz;
5014 /* Is STMT a vectorizable conversion? */
5016 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5017 return false;
5019 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5020 && ! vec_stmt)
5021 return false;
5023 gimple* stmt = stmt_info->stmt;
5024 if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
5025 return false;
5027 if (gimple_get_lhs (stmt) == NULL_TREE
5028 || TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5029 return false;
5031 if (TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5032 return false;
5034 if (is_gimple_assign (stmt))
5036 code = gimple_assign_rhs_code (stmt);
5037 op_type = TREE_CODE_LENGTH ((tree_code) code);
5039 else if (gimple_call_internal_p (stmt))
5041 code = gimple_call_internal_fn (stmt);
5042 op_type = gimple_call_num_args (stmt);
5044 else
5045 return false;
5047 bool widen_arith = (code == WIDEN_MULT_EXPR
5048 || code == WIDEN_LSHIFT_EXPR
5049 || widening_fn_p (code));
5051 if (!widen_arith
5052 && !CONVERT_EXPR_CODE_P (code)
5053 && code != FIX_TRUNC_EXPR
5054 && code != FLOAT_EXPR)
5055 return false;
5057 /* Check types of lhs and rhs. */
5058 scalar_dest = gimple_get_lhs (stmt);
5059 lhs_type = TREE_TYPE (scalar_dest);
5060 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5062 /* Check the operands of the operation. */
5063 slp_tree slp_op0, slp_op1 = NULL;
5064 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5065 0, &op0, &slp_op0, &dt[0], &vectype_in))
5067 if (dump_enabled_p ())
5068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5069 "use not simple.\n");
5070 return false;
5073 rhs_type = TREE_TYPE (op0);
5074 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
5075 && !((INTEGRAL_TYPE_P (lhs_type)
5076 && INTEGRAL_TYPE_P (rhs_type))
5077 || (SCALAR_FLOAT_TYPE_P (lhs_type)
5078 && SCALAR_FLOAT_TYPE_P (rhs_type))))
5079 return false;
5081 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5082 && ((INTEGRAL_TYPE_P (lhs_type)
5083 && !type_has_mode_precision_p (lhs_type))
5084 || (INTEGRAL_TYPE_P (rhs_type)
5085 && !type_has_mode_precision_p (rhs_type))))
5087 if (dump_enabled_p ())
5088 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5089 "type conversion to/from bit-precision unsupported."
5090 "\n");
5091 return false;
5094 if (op_type == binary_op)
5096 gcc_assert (code == WIDEN_MULT_EXPR
5097 || code == WIDEN_LSHIFT_EXPR
5098 || widening_fn_p (code));
5100 op1 = is_gimple_assign (stmt) ? gimple_assign_rhs2 (stmt) :
5101 gimple_call_arg (stmt, 0);
5102 tree vectype1_in;
5103 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
5104 &op1, &slp_op1, &dt[1], &vectype1_in))
5106 if (dump_enabled_p ())
5107 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5108 "use not simple.\n");
5109 return false;
5111 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5112 OP1. */
5113 if (!vectype_in)
5114 vectype_in = vectype1_in;
5117 /* If op0 is an external or constant def, infer the vector type
5118 from the scalar type. */
5119 if (!vectype_in)
5120 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
5121 if (vec_stmt)
5122 gcc_assert (vectype_in);
5123 if (!vectype_in)
5125 if (dump_enabled_p ())
5126 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5127 "no vectype for scalar type %T\n", rhs_type);
5129 return false;
5132 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
5133 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5135 if (dump_enabled_p ())
5136 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5137 "can't convert between boolean and non "
5138 "boolean vectors %T\n", rhs_type);
5140 return false;
5143 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
5144 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5145 if (known_eq (nunits_out, nunits_in))
5146 if (widen_arith)
5147 modifier = WIDEN;
5148 else
5149 modifier = NONE;
5150 else if (multiple_p (nunits_out, nunits_in))
5151 modifier = NARROW_DST;
5152 else
5154 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5155 modifier = WIDEN;
5158 /* Multiple types in SLP are handled by creating the appropriate number of
5159 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5160 case of SLP. */
5161 if (slp_node)
5162 ncopies = 1;
5163 else if (modifier == NARROW_DST)
5164 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
5165 else
5166 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
5168 /* Sanity check: make sure that at least one copy of the vectorized stmt
5169 needs to be generated. */
5170 gcc_assert (ncopies >= 1);
5172 bool found_mode = false;
5173 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5174 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5175 opt_scalar_mode rhs_mode_iter;
5177 /* Supportable by target? */
5178 switch (modifier)
5180 case NONE:
5181 if (code != FIX_TRUNC_EXPR
5182 && code != FLOAT_EXPR
5183 && !CONVERT_EXPR_CODE_P (code))
5184 return false;
5185 gcc_assert (code.is_tree_code ());
5186 if (supportable_convert_operation ((tree_code) code, vectype_out,
5187 vectype_in, &tc1))
5189 code1 = tc1;
5190 break;
5193 /* For conversions between float and smaller integer types try whether we
5194 can use intermediate signed integer types to support the
5195 conversion. */
5196 if ((code == FLOAT_EXPR
5197 && GET_MODE_SIZE (lhs_mode) > GET_MODE_SIZE (rhs_mode))
5198 || (code == FIX_TRUNC_EXPR
5199 && GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode)
5200 && !flag_trapping_math))
5202 bool float_expr_p = code == FLOAT_EXPR;
5203 scalar_mode imode = float_expr_p ? rhs_mode : lhs_mode;
5204 fltsz = GET_MODE_SIZE (float_expr_p ? lhs_mode : rhs_mode);
5205 code1 = float_expr_p ? code : NOP_EXPR;
5206 codecvt1 = float_expr_p ? NOP_EXPR : code;
5207 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, imode)
5209 imode = rhs_mode_iter.require ();
5210 if (GET_MODE_SIZE (imode) > fltsz)
5211 break;
5213 cvt_type
5214 = build_nonstandard_integer_type (GET_MODE_BITSIZE (imode),
5216 cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type,
5217 slp_node);
5218 /* This should only happened for SLP as long as loop vectorizer
5219 only supports same-sized vector. */
5220 if (cvt_type == NULL_TREE
5221 || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nunits_in)
5222 || !supportable_convert_operation ((tree_code) code1,
5223 vectype_out,
5224 cvt_type, &tc1)
5225 || !supportable_convert_operation ((tree_code) codecvt1,
5226 cvt_type,
5227 vectype_in, &tc2))
5228 continue;
5230 found_mode = true;
5231 break;
5234 if (found_mode)
5236 multi_step_cvt++;
5237 interm_types.safe_push (cvt_type);
5238 cvt_type = NULL_TREE;
5239 code1 = tc1;
5240 codecvt1 = tc2;
5241 break;
5244 /* FALLTHRU */
5245 unsupported:
5246 if (dump_enabled_p ())
5247 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5248 "conversion not supported by target.\n");
5249 return false;
5251 case WIDEN:
5252 if (known_eq (nunits_in, nunits_out))
5254 if (!(code.is_tree_code ()
5255 && supportable_half_widening_operation ((tree_code) code,
5256 vectype_out, vectype_in,
5257 &tc1)))
5258 goto unsupported;
5259 code1 = tc1;
5260 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5261 break;
5263 if (supportable_widening_operation (vinfo, code, stmt_info,
5264 vectype_out, vectype_in, &code1,
5265 &code2, &multi_step_cvt,
5266 &interm_types))
5268 /* Binary widening operation can only be supported directly by the
5269 architecture. */
5270 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5271 break;
5274 if (code != FLOAT_EXPR
5275 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
5276 goto unsupported;
5278 fltsz = GET_MODE_SIZE (lhs_mode);
5279 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5281 rhs_mode = rhs_mode_iter.require ();
5282 if (GET_MODE_SIZE (rhs_mode) > fltsz)
5283 break;
5285 cvt_type
5286 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5287 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5288 if (cvt_type == NULL_TREE)
5289 goto unsupported;
5291 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5293 tc1 = ERROR_MARK;
5294 gcc_assert (code.is_tree_code ());
5295 if (!supportable_convert_operation ((tree_code) code, vectype_out,
5296 cvt_type, &tc1))
5297 goto unsupported;
5298 codecvt1 = tc1;
5300 else if (!supportable_widening_operation (vinfo, code,
5301 stmt_info, vectype_out,
5302 cvt_type, &codecvt1,
5303 &codecvt2, &multi_step_cvt,
5304 &interm_types))
5305 continue;
5306 else
5307 gcc_assert (multi_step_cvt == 0);
5309 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5310 cvt_type,
5311 vectype_in, &code1,
5312 &code2, &multi_step_cvt,
5313 &interm_types))
5315 found_mode = true;
5316 break;
5320 if (!found_mode)
5321 goto unsupported;
5323 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5324 codecvt2 = ERROR_MARK;
5325 else
5327 multi_step_cvt++;
5328 interm_types.safe_push (cvt_type);
5329 cvt_type = NULL_TREE;
5331 break;
5333 case NARROW_DST:
5334 gcc_assert (op_type == unary_op);
5335 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5336 &code1, &multi_step_cvt,
5337 &interm_types))
5338 break;
5340 if (GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5341 goto unsupported;
5343 if (code == FIX_TRUNC_EXPR)
5345 cvt_type
5346 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5347 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5348 if (cvt_type == NULL_TREE)
5349 goto unsupported;
5350 if (supportable_convert_operation ((tree_code) code, cvt_type, vectype_in,
5351 &tc1))
5352 codecvt1 = tc1;
5353 else
5354 goto unsupported;
5355 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5356 &code1, &multi_step_cvt,
5357 &interm_types))
5358 break;
5360 /* If op0 can be represented with low precision integer,
5361 truncate it to cvt_type and the do FLOAT_EXPR. */
5362 else if (code == FLOAT_EXPR)
5364 wide_int op_min_value, op_max_value;
5365 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5366 goto unsupported;
5368 cvt_type
5369 = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode), 0);
5370 if (cvt_type == NULL_TREE
5371 || (wi::min_precision (op_max_value, SIGNED)
5372 > TYPE_PRECISION (cvt_type))
5373 || (wi::min_precision (op_min_value, SIGNED)
5374 > TYPE_PRECISION (cvt_type)))
5375 goto unsupported;
5377 cvt_type = get_same_sized_vectype (cvt_type, vectype_out);
5378 if (cvt_type == NULL_TREE)
5379 goto unsupported;
5380 if (!supportable_narrowing_operation (NOP_EXPR, cvt_type, vectype_in,
5381 &code1, &multi_step_cvt,
5382 &interm_types))
5383 goto unsupported;
5384 if (supportable_convert_operation ((tree_code) code, vectype_out,
5385 cvt_type, &tc1))
5387 codecvt1 = tc1;
5388 modifier = NARROW_SRC;
5389 break;
5393 goto unsupported;
5395 default:
5396 gcc_unreachable ();
5399 if (!vec_stmt) /* transformation not required. */
5401 if (slp_node
5402 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5403 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5405 if (dump_enabled_p ())
5406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5407 "incompatible vector types for invariants\n");
5408 return false;
5410 DUMP_VECT_SCOPE ("vectorizable_conversion");
5411 if (modifier == NONE)
5413 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5414 vect_model_simple_cost (vinfo, stmt_info,
5415 ncopies * (1 + multi_step_cvt),
5416 dt, ndts, slp_node, cost_vec);
5418 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5420 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5421 /* The final packing step produces one vector result per copy. */
5422 unsigned int nvectors
5423 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5424 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5425 multi_step_cvt, cost_vec,
5426 widen_arith);
5428 else
5430 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5431 /* The initial unpacking step produces two vector results
5432 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5433 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5434 unsigned int nvectors
5435 = (slp_node
5436 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5437 : ncopies * 2);
5438 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5439 multi_step_cvt, cost_vec,
5440 widen_arith);
5442 interm_types.release ();
5443 return true;
5446 /* Transform. */
5447 if (dump_enabled_p ())
5448 dump_printf_loc (MSG_NOTE, vect_location,
5449 "transform conversion. ncopies = %d.\n", ncopies);
5451 if (op_type == binary_op)
5453 if (CONSTANT_CLASS_P (op0))
5454 op0 = fold_convert (TREE_TYPE (op1), op0);
5455 else if (CONSTANT_CLASS_P (op1))
5456 op1 = fold_convert (TREE_TYPE (op0), op1);
5459 /* In case of multi-step conversion, we first generate conversion operations
5460 to the intermediate types, and then from that types to the final one.
5461 We create vector destinations for the intermediate type (TYPES) received
5462 from supportable_*_operation, and store them in the correct order
5463 for future use in vect_create_vectorized_*_stmts (). */
5464 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5465 bool widen_or_narrow_float_p
5466 = cvt_type && (modifier == WIDEN || modifier == NARROW_SRC);
5467 vec_dest = vect_create_destination_var (scalar_dest,
5468 widen_or_narrow_float_p
5469 ? cvt_type : vectype_out);
5470 vec_dsts.quick_push (vec_dest);
5472 if (multi_step_cvt)
5474 for (i = interm_types.length () - 1;
5475 interm_types.iterate (i, &intermediate_type); i--)
5477 vec_dest = vect_create_destination_var (scalar_dest,
5478 intermediate_type);
5479 vec_dsts.quick_push (vec_dest);
5483 if (cvt_type)
5484 vec_dest = vect_create_destination_var (scalar_dest,
5485 widen_or_narrow_float_p
5486 ? vectype_out : cvt_type);
5488 int ninputs = 1;
5489 if (!slp_node)
5491 if (modifier == WIDEN)
5493 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5495 if (multi_step_cvt)
5496 ninputs = vect_pow2 (multi_step_cvt);
5497 ninputs *= 2;
5501 switch (modifier)
5503 case NONE:
5504 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5505 op0, &vec_oprnds0);
5506 /* vec_dest is intermediate type operand when multi_step_cvt. */
5507 if (multi_step_cvt)
5509 cvt_op = vec_dest;
5510 vec_dest = vec_dsts[0];
5513 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5515 /* Arguments are ready, create the new vector stmt. */
5516 gimple* new_stmt;
5517 if (multi_step_cvt)
5519 gcc_assert (multi_step_cvt == 1);
5520 new_stmt = vect_gimple_build (cvt_op, codecvt1, vop0);
5521 new_temp = make_ssa_name (cvt_op, new_stmt);
5522 gimple_assign_set_lhs (new_stmt, new_temp);
5523 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5524 vop0 = new_temp;
5526 new_stmt = vect_gimple_build (vec_dest, code1, vop0);
5527 new_temp = make_ssa_name (vec_dest, new_stmt);
5528 gimple_set_lhs (new_stmt, new_temp);
5529 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5531 if (slp_node)
5532 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5533 else
5534 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5536 break;
5538 case WIDEN:
5539 /* In case the vectorization factor (VF) is bigger than the number
5540 of elements that we can fit in a vectype (nunits), we have to
5541 generate more than one vector stmt - i.e - we need to "unroll"
5542 the vector stmt by a factor VF/nunits. */
5543 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5544 op0, &vec_oprnds0,
5545 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5546 &vec_oprnds1);
5547 if (code == WIDEN_LSHIFT_EXPR)
5549 int oprnds_size = vec_oprnds0.length ();
5550 vec_oprnds1.create (oprnds_size);
5551 for (i = 0; i < oprnds_size; ++i)
5552 vec_oprnds1.quick_push (op1);
5554 /* Arguments are ready. Create the new vector stmts. */
5555 for (i = multi_step_cvt; i >= 0; i--)
5557 tree this_dest = vec_dsts[i];
5558 code_helper c1 = code1, c2 = code2;
5559 if (i == 0 && codecvt2 != ERROR_MARK)
5561 c1 = codecvt1;
5562 c2 = codecvt2;
5564 if (known_eq (nunits_out, nunits_in))
5565 vect_create_half_widening_stmts (vinfo, &vec_oprnds0, &vec_oprnds1,
5566 stmt_info, this_dest, gsi, c1,
5567 op_type);
5568 else
5569 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5570 &vec_oprnds1, stmt_info,
5571 this_dest, gsi,
5572 c1, c2, op_type);
5575 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5577 gimple *new_stmt;
5578 if (cvt_type)
5580 new_temp = make_ssa_name (vec_dest);
5581 new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5582 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5584 else
5585 new_stmt = SSA_NAME_DEF_STMT (vop0);
5587 if (slp_node)
5588 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5589 else
5590 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5592 break;
5594 case NARROW_SRC:
5595 case NARROW_DST:
5596 /* In case the vectorization factor (VF) is bigger than the number
5597 of elements that we can fit in a vectype (nunits), we have to
5598 generate more than one vector stmt - i.e - we need to "unroll"
5599 the vector stmt by a factor VF/nunits. */
5600 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5601 op0, &vec_oprnds0);
5602 /* Arguments are ready. Create the new vector stmts. */
5603 if (cvt_type && modifier == NARROW_DST)
5604 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5606 new_temp = make_ssa_name (vec_dest);
5607 gimple *new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5608 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5609 vec_oprnds0[i] = new_temp;
5612 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5613 multi_step_cvt,
5614 stmt_info, vec_dsts, gsi,
5615 slp_node, code1,
5616 modifier == NARROW_SRC);
5617 /* After demoting op0 to cvt_type, convert it to dest. */
5618 if (cvt_type && code == FLOAT_EXPR)
5620 for (unsigned int i = 0; i != vec_oprnds0.length() / 2; i++)
5622 /* Arguments are ready, create the new vector stmt. */
5623 gcc_assert (TREE_CODE_LENGTH ((tree_code) codecvt1) == unary_op);
5624 gimple *new_stmt
5625 = vect_gimple_build (vec_dest, codecvt1, vec_oprnds0[i]);
5626 new_temp = make_ssa_name (vec_dest, new_stmt);
5627 gimple_set_lhs (new_stmt, new_temp);
5628 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5630 /* This is the last step of the conversion sequence. Store the
5631 vectors in SLP_NODE or in vector info of the scalar statement
5632 (or in STMT_VINFO_RELATED_STMT chain). */
5633 if (slp_node)
5634 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5635 else
5636 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5639 break;
5641 if (!slp_node)
5642 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5644 vec_oprnds0.release ();
5645 vec_oprnds1.release ();
5646 interm_types.release ();
5648 return true;
5651 /* Return true if we can assume from the scalar form of STMT_INFO that
5652 neither the scalar nor the vector forms will generate code. STMT_INFO
5653 is known not to involve a data reference. */
5655 bool
5656 vect_nop_conversion_p (stmt_vec_info stmt_info)
5658 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5659 if (!stmt)
5660 return false;
5662 tree lhs = gimple_assign_lhs (stmt);
5663 tree_code code = gimple_assign_rhs_code (stmt);
5664 tree rhs = gimple_assign_rhs1 (stmt);
5666 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5667 return true;
5669 if (CONVERT_EXPR_CODE_P (code))
5670 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5672 return false;
5675 /* Function vectorizable_assignment.
5677 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5678 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5679 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5680 Return true if STMT_INFO is vectorizable in this way. */
5682 static bool
5683 vectorizable_assignment (vec_info *vinfo,
5684 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5685 gimple **vec_stmt, slp_tree slp_node,
5686 stmt_vector_for_cost *cost_vec)
5688 tree vec_dest;
5689 tree scalar_dest;
5690 tree op;
5691 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5692 tree new_temp;
5693 enum vect_def_type dt[1] = {vect_unknown_def_type};
5694 int ndts = 1;
5695 int ncopies;
5696 int i;
5697 vec<tree> vec_oprnds = vNULL;
5698 tree vop;
5699 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5700 enum tree_code code;
5701 tree vectype_in;
5703 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5704 return false;
5706 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5707 && ! vec_stmt)
5708 return false;
5710 /* Is vectorizable assignment? */
5711 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5712 if (!stmt)
5713 return false;
5715 scalar_dest = gimple_assign_lhs (stmt);
5716 if (TREE_CODE (scalar_dest) != SSA_NAME)
5717 return false;
5719 if (STMT_VINFO_DATA_REF (stmt_info))
5720 return false;
5722 code = gimple_assign_rhs_code (stmt);
5723 if (!(gimple_assign_single_p (stmt)
5724 || code == PAREN_EXPR
5725 || CONVERT_EXPR_CODE_P (code)))
5726 return false;
5728 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5729 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5731 /* Multiple types in SLP are handled by creating the appropriate number of
5732 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5733 case of SLP. */
5734 if (slp_node)
5735 ncopies = 1;
5736 else
5737 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5739 gcc_assert (ncopies >= 1);
5741 slp_tree slp_op;
5742 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5743 &dt[0], &vectype_in))
5745 if (dump_enabled_p ())
5746 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5747 "use not simple.\n");
5748 return false;
5750 if (!vectype_in)
5751 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5753 /* We can handle NOP_EXPR conversions that do not change the number
5754 of elements or the vector size. */
5755 if ((CONVERT_EXPR_CODE_P (code)
5756 || code == VIEW_CONVERT_EXPR)
5757 && (!vectype_in
5758 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5759 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5760 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5761 return false;
5763 if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
5765 if (dump_enabled_p ())
5766 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5767 "can't convert between boolean and non "
5768 "boolean vectors %T\n", TREE_TYPE (op));
5770 return false;
5773 /* We do not handle bit-precision changes. */
5774 if ((CONVERT_EXPR_CODE_P (code)
5775 || code == VIEW_CONVERT_EXPR)
5776 && ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5777 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5778 || (INTEGRAL_TYPE_P (TREE_TYPE (op))
5779 && !type_has_mode_precision_p (TREE_TYPE (op))))
5780 /* But a conversion that does not change the bit-pattern is ok. */
5781 && !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5782 && INTEGRAL_TYPE_P (TREE_TYPE (op))
5783 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
5784 > TYPE_PRECISION (TREE_TYPE (op)))
5785 && TYPE_UNSIGNED (TREE_TYPE (op))))
5787 if (dump_enabled_p ())
5788 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5789 "type conversion to/from bit-precision "
5790 "unsupported.\n");
5791 return false;
5794 if (!vec_stmt) /* transformation not required. */
5796 if (slp_node
5797 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5799 if (dump_enabled_p ())
5800 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5801 "incompatible vector types for invariants\n");
5802 return false;
5804 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5805 DUMP_VECT_SCOPE ("vectorizable_assignment");
5806 if (!vect_nop_conversion_p (stmt_info))
5807 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5808 cost_vec);
5809 return true;
5812 /* Transform. */
5813 if (dump_enabled_p ())
5814 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5816 /* Handle def. */
5817 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5819 /* Handle use. */
5820 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5822 /* Arguments are ready. create the new vector stmt. */
5823 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5825 if (CONVERT_EXPR_CODE_P (code)
5826 || code == VIEW_CONVERT_EXPR)
5827 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5828 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5829 new_temp = make_ssa_name (vec_dest, new_stmt);
5830 gimple_assign_set_lhs (new_stmt, new_temp);
5831 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5832 if (slp_node)
5833 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5834 else
5835 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5837 if (!slp_node)
5838 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5840 vec_oprnds.release ();
5841 return true;
5845 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5846 either as shift by a scalar or by a vector. */
5848 bool
5849 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5852 machine_mode vec_mode;
5853 optab optab;
5854 int icode;
5855 tree vectype;
5857 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5858 if (!vectype)
5859 return false;
5861 optab = optab_for_tree_code (code, vectype, optab_scalar);
5862 if (!optab
5863 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5865 optab = optab_for_tree_code (code, vectype, optab_vector);
5866 if (!optab
5867 || (optab_handler (optab, TYPE_MODE (vectype))
5868 == CODE_FOR_nothing))
5869 return false;
5872 vec_mode = TYPE_MODE (vectype);
5873 icode = (int) optab_handler (optab, vec_mode);
5874 if (icode == CODE_FOR_nothing)
5875 return false;
5877 return true;
5881 /* Function vectorizable_shift.
5883 Check if STMT_INFO performs a shift operation that can be vectorized.
5884 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5885 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5886 Return true if STMT_INFO is vectorizable in this way. */
5888 static bool
5889 vectorizable_shift (vec_info *vinfo,
5890 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5891 gimple **vec_stmt, slp_tree slp_node,
5892 stmt_vector_for_cost *cost_vec)
5894 tree vec_dest;
5895 tree scalar_dest;
5896 tree op0, op1 = NULL;
5897 tree vec_oprnd1 = NULL_TREE;
5898 tree vectype;
5899 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5900 enum tree_code code;
5901 machine_mode vec_mode;
5902 tree new_temp;
5903 optab optab;
5904 int icode;
5905 machine_mode optab_op2_mode;
5906 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5907 int ndts = 2;
5908 poly_uint64 nunits_in;
5909 poly_uint64 nunits_out;
5910 tree vectype_out;
5911 tree op1_vectype;
5912 int ncopies;
5913 int i;
5914 vec<tree> vec_oprnds0 = vNULL;
5915 vec<tree> vec_oprnds1 = vNULL;
5916 tree vop0, vop1;
5917 unsigned int k;
5918 bool scalar_shift_arg = true;
5919 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5920 bool incompatible_op1_vectype_p = false;
5922 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5923 return false;
5925 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5926 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5927 && ! vec_stmt)
5928 return false;
5930 /* Is STMT a vectorizable binary/unary operation? */
5931 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5932 if (!stmt)
5933 return false;
5935 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5936 return false;
5938 code = gimple_assign_rhs_code (stmt);
5940 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5941 || code == RROTATE_EXPR))
5942 return false;
5944 scalar_dest = gimple_assign_lhs (stmt);
5945 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5946 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5948 if (dump_enabled_p ())
5949 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5950 "bit-precision shifts not supported.\n");
5951 return false;
5954 slp_tree slp_op0;
5955 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5956 0, &op0, &slp_op0, &dt[0], &vectype))
5958 if (dump_enabled_p ())
5959 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5960 "use not simple.\n");
5961 return false;
5963 /* If op0 is an external or constant def, infer the vector type
5964 from the scalar type. */
5965 if (!vectype)
5966 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5967 if (vec_stmt)
5968 gcc_assert (vectype);
5969 if (!vectype)
5971 if (dump_enabled_p ())
5972 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5973 "no vectype for scalar type\n");
5974 return false;
5977 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5978 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5979 if (maybe_ne (nunits_out, nunits_in))
5980 return false;
5982 stmt_vec_info op1_def_stmt_info;
5983 slp_tree slp_op1;
5984 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5985 &dt[1], &op1_vectype, &op1_def_stmt_info))
5987 if (dump_enabled_p ())
5988 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5989 "use not simple.\n");
5990 return false;
5993 /* Multiple types in SLP are handled by creating the appropriate number of
5994 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5995 case of SLP. */
5996 if (slp_node)
5997 ncopies = 1;
5998 else
5999 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6001 gcc_assert (ncopies >= 1);
6003 /* Determine whether the shift amount is a vector, or scalar. If the
6004 shift/rotate amount is a vector, use the vector/vector shift optabs. */
6006 if ((dt[1] == vect_internal_def
6007 || dt[1] == vect_induction_def
6008 || dt[1] == vect_nested_cycle)
6009 && !slp_node)
6010 scalar_shift_arg = false;
6011 else if (dt[1] == vect_constant_def
6012 || dt[1] == vect_external_def
6013 || dt[1] == vect_internal_def)
6015 /* In SLP, need to check whether the shift count is the same,
6016 in loops if it is a constant or invariant, it is always
6017 a scalar shift. */
6018 if (slp_node)
6020 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
6021 stmt_vec_info slpstmt_info;
6023 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
6025 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
6026 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
6027 scalar_shift_arg = false;
6030 /* For internal SLP defs we have to make sure we see scalar stmts
6031 for all vector elements.
6032 ??? For different vectors we could resort to a different
6033 scalar shift operand but code-generation below simply always
6034 takes the first. */
6035 if (dt[1] == vect_internal_def
6036 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
6037 stmts.length ()))
6038 scalar_shift_arg = false;
6041 /* If the shift amount is computed by a pattern stmt we cannot
6042 use the scalar amount directly thus give up and use a vector
6043 shift. */
6044 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
6045 scalar_shift_arg = false;
6047 else
6049 if (dump_enabled_p ())
6050 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6051 "operand mode requires invariant argument.\n");
6052 return false;
6055 /* Vector shifted by vector. */
6056 bool was_scalar_shift_arg = scalar_shift_arg;
6057 if (!scalar_shift_arg)
6059 optab = optab_for_tree_code (code, vectype, optab_vector);
6060 if (dump_enabled_p ())
6061 dump_printf_loc (MSG_NOTE, vect_location,
6062 "vector/vector shift/rotate found.\n");
6064 if (!op1_vectype)
6065 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
6066 slp_op1);
6067 incompatible_op1_vectype_p
6068 = (op1_vectype == NULL_TREE
6069 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
6070 TYPE_VECTOR_SUBPARTS (vectype))
6071 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
6072 if (incompatible_op1_vectype_p
6073 && (!slp_node
6074 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
6075 || slp_op1->refcnt != 1))
6077 if (dump_enabled_p ())
6078 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6079 "unusable type for last operand in"
6080 " vector/vector shift/rotate.\n");
6081 return false;
6084 /* See if the machine has a vector shifted by scalar insn and if not
6085 then see if it has a vector shifted by vector insn. */
6086 else
6088 optab = optab_for_tree_code (code, vectype, optab_scalar);
6089 if (optab
6090 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
6092 if (dump_enabled_p ())
6093 dump_printf_loc (MSG_NOTE, vect_location,
6094 "vector/scalar shift/rotate found.\n");
6096 else
6098 optab = optab_for_tree_code (code, vectype, optab_vector);
6099 if (optab
6100 && (optab_handler (optab, TYPE_MODE (vectype))
6101 != CODE_FOR_nothing))
6103 scalar_shift_arg = false;
6105 if (dump_enabled_p ())
6106 dump_printf_loc (MSG_NOTE, vect_location,
6107 "vector/vector shift/rotate found.\n");
6109 if (!op1_vectype)
6110 op1_vectype = get_vectype_for_scalar_type (vinfo,
6111 TREE_TYPE (op1),
6112 slp_op1);
6114 /* Unlike the other binary operators, shifts/rotates have
6115 the rhs being int, instead of the same type as the lhs,
6116 so make sure the scalar is the right type if we are
6117 dealing with vectors of long long/long/short/char. */
6118 incompatible_op1_vectype_p
6119 = (!op1_vectype
6120 || !tree_nop_conversion_p (TREE_TYPE (vectype),
6121 TREE_TYPE (op1)));
6122 if (incompatible_op1_vectype_p
6123 && dt[1] == vect_internal_def)
6125 if (dump_enabled_p ())
6126 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6127 "unusable type for last operand in"
6128 " vector/vector shift/rotate.\n");
6129 return false;
6135 /* Supportable by target? */
6136 if (!optab)
6138 if (dump_enabled_p ())
6139 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6140 "no optab.\n");
6141 return false;
6143 vec_mode = TYPE_MODE (vectype);
6144 icode = (int) optab_handler (optab, vec_mode);
6145 if (icode == CODE_FOR_nothing)
6147 if (dump_enabled_p ())
6148 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6149 "op not supported by target.\n");
6150 return false;
6152 /* vector lowering cannot optimize vector shifts using word arithmetic. */
6153 if (vect_emulated_vector_p (vectype))
6154 return false;
6156 if (!vec_stmt) /* transformation not required. */
6158 if (slp_node
6159 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6160 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
6161 && (!incompatible_op1_vectype_p
6162 || dt[1] == vect_constant_def)
6163 && !vect_maybe_update_slp_op_vectype
6164 (slp_op1,
6165 incompatible_op1_vectype_p ? vectype : op1_vectype))))
6167 if (dump_enabled_p ())
6168 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6169 "incompatible vector types for invariants\n");
6170 return false;
6172 /* Now adjust the constant shift amount in place. */
6173 if (slp_node
6174 && incompatible_op1_vectype_p
6175 && dt[1] == vect_constant_def)
6177 for (unsigned i = 0;
6178 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
6180 SLP_TREE_SCALAR_OPS (slp_op1)[i]
6181 = fold_convert (TREE_TYPE (vectype),
6182 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
6183 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
6184 == INTEGER_CST));
6187 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
6188 DUMP_VECT_SCOPE ("vectorizable_shift");
6189 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
6190 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
6191 return true;
6194 /* Transform. */
6196 if (dump_enabled_p ())
6197 dump_printf_loc (MSG_NOTE, vect_location,
6198 "transform binary/unary operation.\n");
6200 if (incompatible_op1_vectype_p && !slp_node)
6202 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
6203 op1 = fold_convert (TREE_TYPE (vectype), op1);
6204 if (dt[1] != vect_constant_def)
6205 op1 = vect_init_vector (vinfo, stmt_info, op1,
6206 TREE_TYPE (vectype), NULL);
6209 /* Handle def. */
6210 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6212 if (scalar_shift_arg && dt[1] != vect_internal_def)
6214 /* Vector shl and shr insn patterns can be defined with scalar
6215 operand 2 (shift operand). In this case, use constant or loop
6216 invariant op1 directly, without extending it to vector mode
6217 first. */
6218 optab_op2_mode = insn_data[icode].operand[2].mode;
6219 if (!VECTOR_MODE_P (optab_op2_mode))
6221 if (dump_enabled_p ())
6222 dump_printf_loc (MSG_NOTE, vect_location,
6223 "operand 1 using scalar mode.\n");
6224 vec_oprnd1 = op1;
6225 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
6226 vec_oprnds1.quick_push (vec_oprnd1);
6227 /* Store vec_oprnd1 for every vector stmt to be created.
6228 We check during the analysis that all the shift arguments
6229 are the same.
6230 TODO: Allow different constants for different vector
6231 stmts generated for an SLP instance. */
6232 for (k = 0;
6233 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
6234 vec_oprnds1.quick_push (vec_oprnd1);
6237 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
6239 if (was_scalar_shift_arg)
6241 /* If the argument was the same in all lanes create
6242 the correctly typed vector shift amount directly. */
6243 op1 = fold_convert (TREE_TYPE (vectype), op1);
6244 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
6245 !loop_vinfo ? gsi : NULL);
6246 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
6247 !loop_vinfo ? gsi : NULL);
6248 vec_oprnds1.create (slp_node->vec_stmts_size);
6249 for (k = 0; k < slp_node->vec_stmts_size; k++)
6250 vec_oprnds1.quick_push (vec_oprnd1);
6252 else if (dt[1] == vect_constant_def)
6253 /* The constant shift amount has been adjusted in place. */
6255 else
6256 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
6259 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6260 (a special case for certain kind of vector shifts); otherwise,
6261 operand 1 should be of a vector type (the usual case). */
6262 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6263 op0, &vec_oprnds0,
6264 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
6266 /* Arguments are ready. Create the new vector stmt. */
6267 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6269 /* For internal defs where we need to use a scalar shift arg
6270 extract the first lane. */
6271 if (scalar_shift_arg && dt[1] == vect_internal_def)
6273 vop1 = vec_oprnds1[0];
6274 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
6275 gassign *new_stmt
6276 = gimple_build_assign (new_temp,
6277 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
6278 vop1,
6279 TYPE_SIZE (TREE_TYPE (new_temp)),
6280 bitsize_zero_node));
6281 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6282 vop1 = new_temp;
6284 else
6285 vop1 = vec_oprnds1[i];
6286 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
6287 new_temp = make_ssa_name (vec_dest, new_stmt);
6288 gimple_assign_set_lhs (new_stmt, new_temp);
6289 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6290 if (slp_node)
6291 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6292 else
6293 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6296 if (!slp_node)
6297 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6299 vec_oprnds0.release ();
6300 vec_oprnds1.release ();
6302 return true;
6305 /* Function vectorizable_operation.
6307 Check if STMT_INFO performs a binary, unary or ternary operation that can
6308 be vectorized.
6309 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6310 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6311 Return true if STMT_INFO is vectorizable in this way. */
6313 static bool
6314 vectorizable_operation (vec_info *vinfo,
6315 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6316 gimple **vec_stmt, slp_tree slp_node,
6317 stmt_vector_for_cost *cost_vec)
6319 tree vec_dest;
6320 tree scalar_dest;
6321 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6322 tree vectype;
6323 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6324 enum tree_code code, orig_code;
6325 machine_mode vec_mode;
6326 tree new_temp;
6327 int op_type;
6328 optab optab;
6329 bool target_support_p;
6330 enum vect_def_type dt[3]
6331 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6332 int ndts = 3;
6333 poly_uint64 nunits_in;
6334 poly_uint64 nunits_out;
6335 tree vectype_out;
6336 int ncopies, vec_num;
6337 int i;
6338 vec<tree> vec_oprnds0 = vNULL;
6339 vec<tree> vec_oprnds1 = vNULL;
6340 vec<tree> vec_oprnds2 = vNULL;
6341 tree vop0, vop1, vop2;
6342 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6344 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6345 return false;
6347 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6348 && ! vec_stmt)
6349 return false;
6351 /* Is STMT a vectorizable binary/unary operation? */
6352 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6353 if (!stmt)
6354 return false;
6356 /* Loads and stores are handled in vectorizable_{load,store}. */
6357 if (STMT_VINFO_DATA_REF (stmt_info))
6358 return false;
6360 orig_code = code = gimple_assign_rhs_code (stmt);
6362 /* Shifts are handled in vectorizable_shift. */
6363 if (code == LSHIFT_EXPR
6364 || code == RSHIFT_EXPR
6365 || code == LROTATE_EXPR
6366 || code == RROTATE_EXPR)
6367 return false;
6369 /* Comparisons are handled in vectorizable_comparison. */
6370 if (TREE_CODE_CLASS (code) == tcc_comparison)
6371 return false;
6373 /* Conditions are handled in vectorizable_condition. */
6374 if (code == COND_EXPR)
6375 return false;
6377 /* For pointer addition and subtraction, we should use the normal
6378 plus and minus for the vector operation. */
6379 if (code == POINTER_PLUS_EXPR)
6380 code = PLUS_EXPR;
6381 if (code == POINTER_DIFF_EXPR)
6382 code = MINUS_EXPR;
6384 /* Support only unary or binary operations. */
6385 op_type = TREE_CODE_LENGTH (code);
6386 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6388 if (dump_enabled_p ())
6389 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6390 "num. args = %d (not unary/binary/ternary op).\n",
6391 op_type);
6392 return false;
6395 scalar_dest = gimple_assign_lhs (stmt);
6396 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6398 /* Most operations cannot handle bit-precision types without extra
6399 truncations. */
6400 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6401 if (!mask_op_p
6402 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6403 /* Exception are bitwise binary operations. */
6404 && code != BIT_IOR_EXPR
6405 && code != BIT_XOR_EXPR
6406 && code != BIT_AND_EXPR)
6408 if (dump_enabled_p ())
6409 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6410 "bit-precision arithmetic not supported.\n");
6411 return false;
6414 slp_tree slp_op0;
6415 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6416 0, &op0, &slp_op0, &dt[0], &vectype))
6418 if (dump_enabled_p ())
6419 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6420 "use not simple.\n");
6421 return false;
6423 bool is_invariant = (dt[0] == vect_external_def
6424 || dt[0] == vect_constant_def);
6425 /* If op0 is an external or constant def, infer the vector type
6426 from the scalar type. */
6427 if (!vectype)
6429 /* For boolean type we cannot determine vectype by
6430 invariant value (don't know whether it is a vector
6431 of booleans or vector of integers). We use output
6432 vectype because operations on boolean don't change
6433 type. */
6434 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6436 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6438 if (dump_enabled_p ())
6439 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6440 "not supported operation on bool value.\n");
6441 return false;
6443 vectype = vectype_out;
6445 else
6446 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6447 slp_node);
6449 if (vec_stmt)
6450 gcc_assert (vectype);
6451 if (!vectype)
6453 if (dump_enabled_p ())
6454 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6455 "no vectype for scalar type %T\n",
6456 TREE_TYPE (op0));
6458 return false;
6461 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6462 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6463 if (maybe_ne (nunits_out, nunits_in))
6464 return false;
6466 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6467 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6468 if (op_type == binary_op || op_type == ternary_op)
6470 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6471 1, &op1, &slp_op1, &dt[1], &vectype2))
6473 if (dump_enabled_p ())
6474 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6475 "use not simple.\n");
6476 return false;
6478 is_invariant &= (dt[1] == vect_external_def
6479 || dt[1] == vect_constant_def);
6480 if (vectype2
6481 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
6482 return false;
6484 if (op_type == ternary_op)
6486 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6487 2, &op2, &slp_op2, &dt[2], &vectype3))
6489 if (dump_enabled_p ())
6490 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6491 "use not simple.\n");
6492 return false;
6494 is_invariant &= (dt[2] == vect_external_def
6495 || dt[2] == vect_constant_def);
6496 if (vectype3
6497 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
6498 return false;
6501 /* Multiple types in SLP are handled by creating the appropriate number of
6502 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6503 case of SLP. */
6504 if (slp_node)
6506 ncopies = 1;
6507 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6509 else
6511 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6512 vec_num = 1;
6515 gcc_assert (ncopies >= 1);
6517 /* Reject attempts to combine mask types with nonmask types, e.g. if
6518 we have an AND between a (nonmask) boolean loaded from memory and
6519 a (mask) boolean result of a comparison.
6521 TODO: We could easily fix these cases up using pattern statements. */
6522 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6523 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6524 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6526 if (dump_enabled_p ())
6527 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6528 "mixed mask and nonmask vector types\n");
6529 return false;
6532 /* Supportable by target? */
6534 vec_mode = TYPE_MODE (vectype);
6535 if (code == MULT_HIGHPART_EXPR)
6536 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6537 else
6539 optab = optab_for_tree_code (code, vectype, optab_default);
6540 if (!optab)
6542 if (dump_enabled_p ())
6543 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6544 "no optab.\n");
6545 return false;
6547 target_support_p = (optab_handler (optab, vec_mode) != CODE_FOR_nothing
6548 || optab_libfunc (optab, vec_mode));
6551 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6552 if (!target_support_p || using_emulated_vectors_p)
6554 if (dump_enabled_p ())
6555 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6556 "op not supported by target.\n");
6557 /* When vec_mode is not a vector mode and we verified ops we
6558 do not have to lower like AND are natively supported let
6559 those through even when the mode isn't word_mode. For
6560 ops we have to lower the lowering code assumes we are
6561 dealing with word_mode. */
6562 if ((((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
6563 || !target_support_p)
6564 && maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD))
6565 /* Check only during analysis. */
6566 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6568 if (dump_enabled_p ())
6569 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6570 return false;
6572 if (dump_enabled_p ())
6573 dump_printf_loc (MSG_NOTE, vect_location,
6574 "proceeding using word mode.\n");
6575 using_emulated_vectors_p = true;
6578 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6579 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6580 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
6581 internal_fn cond_fn = get_conditional_internal_fn (code);
6582 internal_fn cond_len_fn = get_conditional_len_internal_fn (code);
6584 /* If operating on inactive elements could generate spurious traps,
6585 we need to restrict the operation to active lanes. Note that this
6586 specifically doesn't apply to unhoisted invariants, since they
6587 operate on the same value for every lane.
6589 Similarly, if this operation is part of a reduction, a fully-masked
6590 loop should only change the active lanes of the reduction chain,
6591 keeping the inactive lanes as-is. */
6592 bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
6593 || reduc_idx >= 0);
6595 if (!vec_stmt) /* transformation not required. */
6597 if (loop_vinfo
6598 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6599 && mask_out_inactive)
6601 if (cond_len_fn != IFN_LAST
6602 && direct_internal_fn_supported_p (cond_len_fn, vectype,
6603 OPTIMIZE_FOR_SPEED))
6604 vect_record_loop_len (loop_vinfo, lens, ncopies * vec_num, vectype,
6606 else if (cond_fn != IFN_LAST
6607 && direct_internal_fn_supported_p (cond_fn, vectype,
6608 OPTIMIZE_FOR_SPEED))
6609 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6610 vectype, NULL);
6611 else
6613 if (dump_enabled_p ())
6614 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6615 "can't use a fully-masked loop because no"
6616 " conditional operation is available.\n");
6617 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6621 /* Put types on constant and invariant SLP children. */
6622 if (slp_node
6623 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6624 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6625 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6627 if (dump_enabled_p ())
6628 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6629 "incompatible vector types for invariants\n");
6630 return false;
6633 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6634 DUMP_VECT_SCOPE ("vectorizable_operation");
6635 vect_model_simple_cost (vinfo, stmt_info,
6636 ncopies, dt, ndts, slp_node, cost_vec);
6637 if (using_emulated_vectors_p)
6639 /* The above vect_model_simple_cost call handles constants
6640 in the prologue and (mis-)costs one of the stmts as
6641 vector stmt. See below for the actual lowering that will
6642 be applied. */
6643 unsigned n
6644 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6645 switch (code)
6647 case PLUS_EXPR:
6648 n *= 5;
6649 break;
6650 case MINUS_EXPR:
6651 n *= 6;
6652 break;
6653 case NEGATE_EXPR:
6654 n *= 4;
6655 break;
6656 default:
6657 /* Bit operations do not have extra cost and are accounted
6658 as vector stmt by vect_model_simple_cost. */
6659 n = 0;
6660 break;
6662 if (n != 0)
6664 /* We also need to materialize two large constants. */
6665 record_stmt_cost (cost_vec, 2, scalar_stmt, stmt_info,
6666 0, vect_prologue);
6667 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info,
6668 0, vect_body);
6671 return true;
6674 /* Transform. */
6676 if (dump_enabled_p ())
6677 dump_printf_loc (MSG_NOTE, vect_location,
6678 "transform binary/unary operation.\n");
6680 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6681 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
6683 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6684 vectors with unsigned elements, but the result is signed. So, we
6685 need to compute the MINUS_EXPR into vectype temporary and
6686 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6687 tree vec_cvt_dest = NULL_TREE;
6688 if (orig_code == POINTER_DIFF_EXPR)
6690 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6691 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6693 /* Handle def. */
6694 else
6695 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6697 /* In case the vectorization factor (VF) is bigger than the number
6698 of elements that we can fit in a vectype (nunits), we have to generate
6699 more than one vector stmt - i.e - we need to "unroll" the
6700 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6701 from one copy of the vector stmt to the next, in the field
6702 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6703 stages to find the correct vector defs to be used when vectorizing
6704 stmts that use the defs of the current stmt. The example below
6705 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6706 we need to create 4 vectorized stmts):
6708 before vectorization:
6709 RELATED_STMT VEC_STMT
6710 S1: x = memref - -
6711 S2: z = x + 1 - -
6713 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6714 there):
6715 RELATED_STMT VEC_STMT
6716 VS1_0: vx0 = memref0 VS1_1 -
6717 VS1_1: vx1 = memref1 VS1_2 -
6718 VS1_2: vx2 = memref2 VS1_3 -
6719 VS1_3: vx3 = memref3 - -
6720 S1: x = load - VS1_0
6721 S2: z = x + 1 - -
6723 step2: vectorize stmt S2 (done here):
6724 To vectorize stmt S2 we first need to find the relevant vector
6725 def for the first operand 'x'. This is, as usual, obtained from
6726 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6727 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6728 relevant vector def 'vx0'. Having found 'vx0' we can generate
6729 the vector stmt VS2_0, and as usual, record it in the
6730 STMT_VINFO_VEC_STMT of stmt S2.
6731 When creating the second copy (VS2_1), we obtain the relevant vector
6732 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6733 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6734 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6735 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6736 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6737 chain of stmts and pointers:
6738 RELATED_STMT VEC_STMT
6739 VS1_0: vx0 = memref0 VS1_1 -
6740 VS1_1: vx1 = memref1 VS1_2 -
6741 VS1_2: vx2 = memref2 VS1_3 -
6742 VS1_3: vx3 = memref3 - -
6743 S1: x = load - VS1_0
6744 VS2_0: vz0 = vx0 + v1 VS2_1 -
6745 VS2_1: vz1 = vx1 + v1 VS2_2 -
6746 VS2_2: vz2 = vx2 + v1 VS2_3 -
6747 VS2_3: vz3 = vx3 + v1 - -
6748 S2: z = x + 1 - VS2_0 */
6750 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6751 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6752 /* Arguments are ready. Create the new vector stmt. */
6753 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6755 gimple *new_stmt = NULL;
6756 vop1 = ((op_type == binary_op || op_type == ternary_op)
6757 ? vec_oprnds1[i] : NULL_TREE);
6758 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6759 if (using_emulated_vectors_p
6760 && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR))
6762 /* Lower the operation. This follows vector lowering. */
6763 unsigned int width = vector_element_bits (vectype);
6764 tree inner_type = TREE_TYPE (vectype);
6765 tree word_type
6766 = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode), 1);
6767 HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
6768 tree low_bits = build_replicated_int_cst (word_type, width, max >> 1);
6769 tree high_bits
6770 = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
6771 tree wvop0 = make_ssa_name (word_type);
6772 new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR,
6773 build1 (VIEW_CONVERT_EXPR,
6774 word_type, vop0));
6775 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6776 tree result_low, signs;
6777 if (code == PLUS_EXPR || code == MINUS_EXPR)
6779 tree wvop1 = make_ssa_name (word_type);
6780 new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR,
6781 build1 (VIEW_CONVERT_EXPR,
6782 word_type, vop1));
6783 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6784 signs = make_ssa_name (word_type);
6785 new_stmt = gimple_build_assign (signs,
6786 BIT_XOR_EXPR, wvop0, wvop1);
6787 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6788 tree b_low = make_ssa_name (word_type);
6789 new_stmt = gimple_build_assign (b_low,
6790 BIT_AND_EXPR, wvop1, low_bits);
6791 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6792 tree a_low = make_ssa_name (word_type);
6793 if (code == PLUS_EXPR)
6794 new_stmt = gimple_build_assign (a_low,
6795 BIT_AND_EXPR, wvop0, low_bits);
6796 else
6797 new_stmt = gimple_build_assign (a_low,
6798 BIT_IOR_EXPR, wvop0, high_bits);
6799 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6800 if (code == MINUS_EXPR)
6802 new_stmt = gimple_build_assign (NULL_TREE,
6803 BIT_NOT_EXPR, signs);
6804 signs = make_ssa_name (word_type);
6805 gimple_assign_set_lhs (new_stmt, signs);
6806 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6808 new_stmt = gimple_build_assign (NULL_TREE,
6809 BIT_AND_EXPR, signs, high_bits);
6810 signs = make_ssa_name (word_type);
6811 gimple_assign_set_lhs (new_stmt, signs);
6812 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6813 result_low = make_ssa_name (word_type);
6814 new_stmt = gimple_build_assign (result_low, code, a_low, b_low);
6815 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6817 else
6819 tree a_low = make_ssa_name (word_type);
6820 new_stmt = gimple_build_assign (a_low,
6821 BIT_AND_EXPR, wvop0, low_bits);
6822 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6823 signs = make_ssa_name (word_type);
6824 new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
6825 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6826 new_stmt = gimple_build_assign (NULL_TREE,
6827 BIT_AND_EXPR, signs, high_bits);
6828 signs = make_ssa_name (word_type);
6829 gimple_assign_set_lhs (new_stmt, signs);
6830 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6831 result_low = make_ssa_name (word_type);
6832 new_stmt = gimple_build_assign (result_low,
6833 MINUS_EXPR, high_bits, a_low);
6834 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6836 new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR, result_low,
6837 signs);
6838 result_low = make_ssa_name (word_type);
6839 gimple_assign_set_lhs (new_stmt, result_low);
6840 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6841 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR,
6842 build1 (VIEW_CONVERT_EXPR,
6843 vectype, result_low));
6844 new_temp = make_ssa_name (vectype);
6845 gimple_assign_set_lhs (new_stmt, new_temp);
6846 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6848 else if ((masked_loop_p || len_loop_p) && mask_out_inactive)
6850 tree mask;
6851 if (masked_loop_p)
6852 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
6853 vec_num * ncopies, vectype, i);
6854 else
6855 /* Dummy mask. */
6856 mask = build_minus_one_cst (truth_type_for (vectype));
6857 auto_vec<tree> vops (6);
6858 vops.quick_push (mask);
6859 vops.quick_push (vop0);
6860 if (vop1)
6861 vops.quick_push (vop1);
6862 if (vop2)
6863 vops.quick_push (vop2);
6864 if (reduc_idx >= 0)
6866 /* Perform the operation on active elements only and take
6867 inactive elements from the reduction chain input. */
6868 gcc_assert (!vop2);
6869 vops.quick_push (reduc_idx == 1 ? vop1 : vop0);
6871 else
6873 auto else_value = targetm.preferred_else_value
6874 (cond_fn, vectype, vops.length () - 1, &vops[1]);
6875 vops.quick_push (else_value);
6877 if (len_loop_p)
6879 tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
6880 vec_num * ncopies, vectype, i, 1);
6881 signed char biasval
6882 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
6883 tree bias = build_int_cst (intQI_type_node, biasval);
6884 vops.quick_push (len);
6885 vops.quick_push (bias);
6887 gcall *call
6888 = gimple_build_call_internal_vec (masked_loop_p ? cond_fn
6889 : cond_len_fn,
6890 vops);
6891 new_temp = make_ssa_name (vec_dest, call);
6892 gimple_call_set_lhs (call, new_temp);
6893 gimple_call_set_nothrow (call, true);
6894 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6895 new_stmt = call;
6897 else
6899 tree mask = NULL_TREE;
6900 /* When combining two masks check if either of them is elsewhere
6901 combined with a loop mask, if that's the case we can mark that the
6902 new combined mask doesn't need to be combined with a loop mask. */
6903 if (masked_loop_p
6904 && code == BIT_AND_EXPR
6905 && VECTOR_BOOLEAN_TYPE_P (vectype))
6907 if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
6908 ncopies}))
6910 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
6911 vec_num * ncopies, vectype, i);
6913 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6914 vop0, gsi);
6917 if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
6918 ncopies }))
6920 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
6921 vec_num * ncopies, vectype, i);
6923 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6924 vop1, gsi);
6928 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6929 new_temp = make_ssa_name (vec_dest, new_stmt);
6930 gimple_assign_set_lhs (new_stmt, new_temp);
6931 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6932 if (using_emulated_vectors_p)
6933 suppress_warning (new_stmt, OPT_Wvector_operation_performance);
6935 /* Enter the combined value into the vector cond hash so we don't
6936 AND it with a loop mask again. */
6937 if (mask)
6938 loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
6941 if (vec_cvt_dest)
6943 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6944 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6945 new_temp);
6946 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6947 gimple_assign_set_lhs (new_stmt, new_temp);
6948 vect_finish_stmt_generation (vinfo, stmt_info,
6949 new_stmt, gsi);
6952 if (slp_node)
6953 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6954 else
6955 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6958 if (!slp_node)
6959 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6961 vec_oprnds0.release ();
6962 vec_oprnds1.release ();
6963 vec_oprnds2.release ();
6965 return true;
6968 /* A helper function to ensure data reference DR_INFO's base alignment. */
6970 static void
6971 ensure_base_align (dr_vec_info *dr_info)
6973 /* Alignment is only analyzed for the first element of a DR group,
6974 use that to look at base alignment we need to enforce. */
6975 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
6976 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
6978 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
6980 if (dr_info->base_misaligned)
6982 tree base_decl = dr_info->base_decl;
6984 // We should only be able to increase the alignment of a base object if
6985 // we know what its new alignment should be at compile time.
6986 unsigned HOST_WIDE_INT align_base_to =
6987 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6989 if (decl_in_symtab_p (base_decl))
6990 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6991 else if (DECL_ALIGN (base_decl) < align_base_to)
6993 SET_DECL_ALIGN (base_decl, align_base_to);
6994 DECL_USER_ALIGN (base_decl) = 1;
6996 dr_info->base_misaligned = false;
7001 /* Function get_group_alias_ptr_type.
7003 Return the alias type for the group starting at FIRST_STMT_INFO. */
7005 static tree
7006 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
7008 struct data_reference *first_dr, *next_dr;
7010 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
7011 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
7012 while (next_stmt_info)
7014 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
7015 if (get_alias_set (DR_REF (first_dr))
7016 != get_alias_set (DR_REF (next_dr)))
7018 if (dump_enabled_p ())
7019 dump_printf_loc (MSG_NOTE, vect_location,
7020 "conflicting alias set types.\n");
7021 return ptr_type_node;
7023 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7025 return reference_alias_ptr_type (DR_REF (first_dr));
7029 /* Function scan_operand_equal_p.
7031 Helper function for check_scan_store. Compare two references
7032 with .GOMP_SIMD_LANE bases. */
7034 static bool
7035 scan_operand_equal_p (tree ref1, tree ref2)
7037 tree ref[2] = { ref1, ref2 };
7038 poly_int64 bitsize[2], bitpos[2];
7039 tree offset[2], base[2];
7040 for (int i = 0; i < 2; ++i)
7042 machine_mode mode;
7043 int unsignedp, reversep, volatilep = 0;
7044 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
7045 &offset[i], &mode, &unsignedp,
7046 &reversep, &volatilep);
7047 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
7048 return false;
7049 if (TREE_CODE (base[i]) == MEM_REF
7050 && offset[i] == NULL_TREE
7051 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
7053 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
7054 if (is_gimple_assign (def_stmt)
7055 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
7056 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
7057 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
7059 if (maybe_ne (mem_ref_offset (base[i]), 0))
7060 return false;
7061 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
7062 offset[i] = gimple_assign_rhs2 (def_stmt);
7067 if (!operand_equal_p (base[0], base[1], 0))
7068 return false;
7069 if (maybe_ne (bitsize[0], bitsize[1]))
7070 return false;
7071 if (offset[0] != offset[1])
7073 if (!offset[0] || !offset[1])
7074 return false;
7075 if (!operand_equal_p (offset[0], offset[1], 0))
7077 tree step[2];
7078 for (int i = 0; i < 2; ++i)
7080 step[i] = integer_one_node;
7081 if (TREE_CODE (offset[i]) == SSA_NAME)
7083 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7084 if (is_gimple_assign (def_stmt)
7085 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
7086 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
7087 == INTEGER_CST))
7089 step[i] = gimple_assign_rhs2 (def_stmt);
7090 offset[i] = gimple_assign_rhs1 (def_stmt);
7093 else if (TREE_CODE (offset[i]) == MULT_EXPR)
7095 step[i] = TREE_OPERAND (offset[i], 1);
7096 offset[i] = TREE_OPERAND (offset[i], 0);
7098 tree rhs1 = NULL_TREE;
7099 if (TREE_CODE (offset[i]) == SSA_NAME)
7101 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7102 if (gimple_assign_cast_p (def_stmt))
7103 rhs1 = gimple_assign_rhs1 (def_stmt);
7105 else if (CONVERT_EXPR_P (offset[i]))
7106 rhs1 = TREE_OPERAND (offset[i], 0);
7107 if (rhs1
7108 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
7109 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
7110 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
7111 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
7112 offset[i] = rhs1;
7114 if (!operand_equal_p (offset[0], offset[1], 0)
7115 || !operand_equal_p (step[0], step[1], 0))
7116 return false;
7119 return true;
7123 enum scan_store_kind {
7124 /* Normal permutation. */
7125 scan_store_kind_perm,
7127 /* Whole vector left shift permutation with zero init. */
7128 scan_store_kind_lshift_zero,
7130 /* Whole vector left shift permutation and VEC_COND_EXPR. */
7131 scan_store_kind_lshift_cond
7134 /* Function check_scan_store.
7136 Verify if we can perform the needed permutations or whole vector shifts.
7137 Return -1 on failure, otherwise exact log2 of vectype's nunits.
7138 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
7139 to do at each step. */
7141 static int
7142 scan_store_can_perm_p (tree vectype, tree init,
7143 vec<enum scan_store_kind> *use_whole_vector = NULL)
7145 enum machine_mode vec_mode = TYPE_MODE (vectype);
7146 unsigned HOST_WIDE_INT nunits;
7147 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7148 return -1;
7149 int units_log2 = exact_log2 (nunits);
7150 if (units_log2 <= 0)
7151 return -1;
7153 int i;
7154 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
7155 for (i = 0; i <= units_log2; ++i)
7157 unsigned HOST_WIDE_INT j, k;
7158 enum scan_store_kind kind = scan_store_kind_perm;
7159 vec_perm_builder sel (nunits, nunits, 1);
7160 sel.quick_grow (nunits);
7161 if (i == units_log2)
7163 for (j = 0; j < nunits; ++j)
7164 sel[j] = nunits - 1;
7166 else
7168 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7169 sel[j] = j;
7170 for (k = 0; j < nunits; ++j, ++k)
7171 sel[j] = nunits + k;
7173 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7174 if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
7176 if (i == units_log2)
7177 return -1;
7179 if (whole_vector_shift_kind == scan_store_kind_perm)
7181 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
7182 return -1;
7183 whole_vector_shift_kind = scan_store_kind_lshift_zero;
7184 /* Whole vector shifts shift in zeros, so if init is all zero
7185 constant, there is no need to do anything further. */
7186 if ((TREE_CODE (init) != INTEGER_CST
7187 && TREE_CODE (init) != REAL_CST)
7188 || !initializer_zerop (init))
7190 tree masktype = truth_type_for (vectype);
7191 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
7192 return -1;
7193 whole_vector_shift_kind = scan_store_kind_lshift_cond;
7196 kind = whole_vector_shift_kind;
7198 if (use_whole_vector)
7200 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
7201 use_whole_vector->safe_grow_cleared (i, true);
7202 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
7203 use_whole_vector->safe_push (kind);
7207 return units_log2;
7211 /* Function check_scan_store.
7213 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
7215 static bool
7216 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
7217 enum vect_def_type rhs_dt, bool slp, tree mask,
7218 vect_memory_access_type memory_access_type)
7220 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7221 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7222 tree ref_type;
7224 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
7225 if (slp
7226 || mask
7227 || memory_access_type != VMAT_CONTIGUOUS
7228 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
7229 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
7230 || loop_vinfo == NULL
7231 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7232 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
7233 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
7234 || !integer_zerop (DR_INIT (dr_info->dr))
7235 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
7236 || !alias_sets_conflict_p (get_alias_set (vectype),
7237 get_alias_set (TREE_TYPE (ref_type))))
7239 if (dump_enabled_p ())
7240 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7241 "unsupported OpenMP scan store.\n");
7242 return false;
7245 /* We need to pattern match code built by OpenMP lowering and simplified
7246 by following optimizations into something we can handle.
7247 #pragma omp simd reduction(inscan,+:r)
7248 for (...)
7250 r += something ();
7251 #pragma omp scan inclusive (r)
7252 use (r);
7254 shall have body with:
7255 // Initialization for input phase, store the reduction initializer:
7256 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7257 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7258 D.2042[_21] = 0;
7259 // Actual input phase:
7261 r.0_5 = D.2042[_20];
7262 _6 = _4 + r.0_5;
7263 D.2042[_20] = _6;
7264 // Initialization for scan phase:
7265 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7266 _26 = D.2043[_25];
7267 _27 = D.2042[_25];
7268 _28 = _26 + _27;
7269 D.2043[_25] = _28;
7270 D.2042[_25] = _28;
7271 // Actual scan phase:
7273 r.1_8 = D.2042[_20];
7275 The "omp simd array" variable D.2042 holds the privatized copy used
7276 inside of the loop and D.2043 is another one that holds copies of
7277 the current original list item. The separate GOMP_SIMD_LANE ifn
7278 kinds are there in order to allow optimizing the initializer store
7279 and combiner sequence, e.g. if it is originally some C++ish user
7280 defined reduction, but allow the vectorizer to pattern recognize it
7281 and turn into the appropriate vectorized scan.
7283 For exclusive scan, this is slightly different:
7284 #pragma omp simd reduction(inscan,+:r)
7285 for (...)
7287 use (r);
7288 #pragma omp scan exclusive (r)
7289 r += something ();
7291 shall have body with:
7292 // Initialization for input phase, store the reduction initializer:
7293 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7294 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7295 D.2042[_21] = 0;
7296 // Actual input phase:
7298 r.0_5 = D.2042[_20];
7299 _6 = _4 + r.0_5;
7300 D.2042[_20] = _6;
7301 // Initialization for scan phase:
7302 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7303 _26 = D.2043[_25];
7304 D.2044[_25] = _26;
7305 _27 = D.2042[_25];
7306 _28 = _26 + _27;
7307 D.2043[_25] = _28;
7308 // Actual scan phase:
7310 r.1_8 = D.2044[_20];
7311 ... */
7313 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
7315 /* Match the D.2042[_21] = 0; store above. Just require that
7316 it is a constant or external definition store. */
7317 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
7319 fail_init:
7320 if (dump_enabled_p ())
7321 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7322 "unsupported OpenMP scan initializer store.\n");
7323 return false;
7326 if (! loop_vinfo->scan_map)
7327 loop_vinfo->scan_map = new hash_map<tree, tree>;
7328 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7329 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
7330 if (cached)
7331 goto fail_init;
7332 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
7334 /* These stores can be vectorized normally. */
7335 return true;
7338 if (rhs_dt != vect_internal_def)
7340 fail:
7341 if (dump_enabled_p ())
7342 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7343 "unsupported OpenMP scan combiner pattern.\n");
7344 return false;
7347 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7348 tree rhs = gimple_assign_rhs1 (stmt);
7349 if (TREE_CODE (rhs) != SSA_NAME)
7350 goto fail;
7352 gimple *other_store_stmt = NULL;
7353 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7354 bool inscan_var_store
7355 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7357 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7359 if (!inscan_var_store)
7361 use_operand_p use_p;
7362 imm_use_iterator iter;
7363 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7365 gimple *use_stmt = USE_STMT (use_p);
7366 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7367 continue;
7368 if (gimple_bb (use_stmt) != gimple_bb (stmt)
7369 || !is_gimple_assign (use_stmt)
7370 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
7371 || other_store_stmt
7372 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
7373 goto fail;
7374 other_store_stmt = use_stmt;
7376 if (other_store_stmt == NULL)
7377 goto fail;
7378 rhs = gimple_assign_lhs (other_store_stmt);
7379 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
7380 goto fail;
7383 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
7385 use_operand_p use_p;
7386 imm_use_iterator iter;
7387 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7389 gimple *use_stmt = USE_STMT (use_p);
7390 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7391 continue;
7392 if (other_store_stmt)
7393 goto fail;
7394 other_store_stmt = use_stmt;
7397 else
7398 goto fail;
7400 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7401 if (gimple_bb (def_stmt) != gimple_bb (stmt)
7402 || !is_gimple_assign (def_stmt)
7403 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
7404 goto fail;
7406 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7407 /* For pointer addition, we should use the normal plus for the vector
7408 operation. */
7409 switch (code)
7411 case POINTER_PLUS_EXPR:
7412 code = PLUS_EXPR;
7413 break;
7414 case MULT_HIGHPART_EXPR:
7415 goto fail;
7416 default:
7417 break;
7419 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
7420 goto fail;
7422 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7423 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7424 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
7425 goto fail;
7427 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7428 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7429 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
7430 || !gimple_assign_load_p (load1_stmt)
7431 || gimple_bb (load2_stmt) != gimple_bb (stmt)
7432 || !gimple_assign_load_p (load2_stmt))
7433 goto fail;
7435 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7436 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7437 if (load1_stmt_info == NULL
7438 || load2_stmt_info == NULL
7439 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7440 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7441 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7442 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7443 goto fail;
7445 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
7447 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7448 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7449 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
7450 goto fail;
7451 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7452 tree lrhs;
7453 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7454 lrhs = rhs1;
7455 else
7456 lrhs = rhs2;
7457 use_operand_p use_p;
7458 imm_use_iterator iter;
7459 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7461 gimple *use_stmt = USE_STMT (use_p);
7462 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
7463 continue;
7464 if (other_store_stmt)
7465 goto fail;
7466 other_store_stmt = use_stmt;
7470 if (other_store_stmt == NULL)
7471 goto fail;
7472 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
7473 || !gimple_store_p (other_store_stmt))
7474 goto fail;
7476 stmt_vec_info other_store_stmt_info
7477 = loop_vinfo->lookup_stmt (other_store_stmt);
7478 if (other_store_stmt_info == NULL
7479 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7480 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7481 goto fail;
7483 gimple *stmt1 = stmt;
7484 gimple *stmt2 = other_store_stmt;
7485 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7486 std::swap (stmt1, stmt2);
7487 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
7488 gimple_assign_rhs1 (load2_stmt)))
7490 std::swap (rhs1, rhs2);
7491 std::swap (load1_stmt, load2_stmt);
7492 std::swap (load1_stmt_info, load2_stmt_info);
7494 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
7495 gimple_assign_rhs1 (load1_stmt)))
7496 goto fail;
7498 tree var3 = NULL_TREE;
7499 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7500 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
7501 gimple_assign_rhs1 (load2_stmt)))
7502 goto fail;
7503 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7505 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7506 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7507 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7508 goto fail;
7509 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7510 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
7511 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
7512 || lookup_attribute ("omp simd inscan exclusive",
7513 DECL_ATTRIBUTES (var3)))
7514 goto fail;
7517 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7518 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7519 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7520 goto fail;
7522 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7523 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7524 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
7525 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
7526 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7527 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
7528 goto fail;
7530 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7531 std::swap (var1, var2);
7533 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7535 if (!lookup_attribute ("omp simd inscan exclusive",
7536 DECL_ATTRIBUTES (var1)))
7537 goto fail;
7538 var1 = var3;
7541 if (loop_vinfo->scan_map == NULL)
7542 goto fail;
7543 tree *init = loop_vinfo->scan_map->get (var1);
7544 if (init == NULL)
7545 goto fail;
7547 /* The IL is as expected, now check if we can actually vectorize it.
7548 Inclusive scan:
7549 _26 = D.2043[_25];
7550 _27 = D.2042[_25];
7551 _28 = _26 + _27;
7552 D.2043[_25] = _28;
7553 D.2042[_25] = _28;
7554 should be vectorized as (where _40 is the vectorized rhs
7555 from the D.2042[_21] = 0; store):
7556 _30 = MEM <vector(8) int> [(int *)&D.2043];
7557 _31 = MEM <vector(8) int> [(int *)&D.2042];
7558 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7559 _33 = _31 + _32;
7560 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7561 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7562 _35 = _33 + _34;
7563 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7564 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7565 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7566 _37 = _35 + _36;
7567 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7568 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7569 _38 = _30 + _37;
7570 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7571 MEM <vector(8) int> [(int *)&D.2043] = _39;
7572 MEM <vector(8) int> [(int *)&D.2042] = _38;
7573 Exclusive scan:
7574 _26 = D.2043[_25];
7575 D.2044[_25] = _26;
7576 _27 = D.2042[_25];
7577 _28 = _26 + _27;
7578 D.2043[_25] = _28;
7579 should be vectorized as (where _40 is the vectorized rhs
7580 from the D.2042[_21] = 0; store):
7581 _30 = MEM <vector(8) int> [(int *)&D.2043];
7582 _31 = MEM <vector(8) int> [(int *)&D.2042];
7583 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7584 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7585 _34 = _32 + _33;
7586 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7587 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7588 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7589 _36 = _34 + _35;
7590 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7591 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7592 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7593 _38 = _36 + _37;
7594 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7595 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7596 _39 = _30 + _38;
7597 _50 = _31 + _39;
7598 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7599 MEM <vector(8) int> [(int *)&D.2044] = _39;
7600 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7601 enum machine_mode vec_mode = TYPE_MODE (vectype);
7602 optab optab = optab_for_tree_code (code, vectype, optab_default);
7603 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7604 goto fail;
7606 int units_log2 = scan_store_can_perm_p (vectype, *init);
7607 if (units_log2 == -1)
7608 goto fail;
7610 return true;
7614 /* Function vectorizable_scan_store.
7616 Helper of vectorizable_score, arguments like on vectorizable_store.
7617 Handle only the transformation, checking is done in check_scan_store. */
7619 static bool
7620 vectorizable_scan_store (vec_info *vinfo,
7621 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7622 gimple **vec_stmt, int ncopies)
7624 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7625 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7626 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7627 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7629 if (dump_enabled_p ())
7630 dump_printf_loc (MSG_NOTE, vect_location,
7631 "transform scan store. ncopies = %d\n", ncopies);
7633 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7634 tree rhs = gimple_assign_rhs1 (stmt);
7635 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7637 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7638 bool inscan_var_store
7639 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7641 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7643 use_operand_p use_p;
7644 imm_use_iterator iter;
7645 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7647 gimple *use_stmt = USE_STMT (use_p);
7648 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7649 continue;
7650 rhs = gimple_assign_lhs (use_stmt);
7651 break;
7655 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7656 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7657 if (code == POINTER_PLUS_EXPR)
7658 code = PLUS_EXPR;
7659 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7660 && commutative_tree_code (code));
7661 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7662 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7663 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7664 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7665 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7666 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7667 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7668 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7669 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7670 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7671 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7673 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7675 std::swap (rhs1, rhs2);
7676 std::swap (var1, var2);
7677 std::swap (load1_dr_info, load2_dr_info);
7680 tree *init = loop_vinfo->scan_map->get (var1);
7681 gcc_assert (init);
7683 unsigned HOST_WIDE_INT nunits;
7684 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7685 gcc_unreachable ();
7686 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7687 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7688 gcc_assert (units_log2 > 0);
7689 auto_vec<tree, 16> perms;
7690 perms.quick_grow (units_log2 + 1);
7691 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7692 for (int i = 0; i <= units_log2; ++i)
7694 unsigned HOST_WIDE_INT j, k;
7695 vec_perm_builder sel (nunits, nunits, 1);
7696 sel.quick_grow (nunits);
7697 if (i == units_log2)
7698 for (j = 0; j < nunits; ++j)
7699 sel[j] = nunits - 1;
7700 else
7702 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7703 sel[j] = j;
7704 for (k = 0; j < nunits; ++j, ++k)
7705 sel[j] = nunits + k;
7707 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7708 if (!use_whole_vector.is_empty ()
7709 && use_whole_vector[i] != scan_store_kind_perm)
7711 if (zero_vec == NULL_TREE)
7712 zero_vec = build_zero_cst (vectype);
7713 if (masktype == NULL_TREE
7714 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7715 masktype = truth_type_for (vectype);
7716 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7718 else
7719 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7722 tree vec_oprnd1 = NULL_TREE;
7723 tree vec_oprnd2 = NULL_TREE;
7724 tree vec_oprnd3 = NULL_TREE;
7725 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7726 tree dataref_offset = build_int_cst (ref_type, 0);
7727 tree bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info,
7728 vectype, VMAT_CONTIGUOUS);
7729 tree ldataref_ptr = NULL_TREE;
7730 tree orig = NULL_TREE;
7731 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7732 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7733 auto_vec<tree> vec_oprnds1;
7734 auto_vec<tree> vec_oprnds2;
7735 auto_vec<tree> vec_oprnds3;
7736 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7737 *init, &vec_oprnds1,
7738 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
7739 rhs2, &vec_oprnds3);
7740 for (int j = 0; j < ncopies; j++)
7742 vec_oprnd1 = vec_oprnds1[j];
7743 if (ldataref_ptr == NULL)
7744 vec_oprnd2 = vec_oprnds2[j];
7745 vec_oprnd3 = vec_oprnds3[j];
7746 if (j == 0)
7747 orig = vec_oprnd3;
7748 else if (!inscan_var_store)
7749 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7751 if (ldataref_ptr)
7753 vec_oprnd2 = make_ssa_name (vectype);
7754 tree data_ref = fold_build2 (MEM_REF, vectype,
7755 unshare_expr (ldataref_ptr),
7756 dataref_offset);
7757 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7758 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7759 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7760 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7761 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7764 tree v = vec_oprnd2;
7765 for (int i = 0; i < units_log2; ++i)
7767 tree new_temp = make_ssa_name (vectype);
7768 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7769 (zero_vec
7770 && (use_whole_vector[i]
7771 != scan_store_kind_perm))
7772 ? zero_vec : vec_oprnd1, v,
7773 perms[i]);
7774 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7775 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7776 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7778 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7780 /* Whole vector shift shifted in zero bits, but if *init
7781 is not initializer_zerop, we need to replace those elements
7782 with elements from vec_oprnd1. */
7783 tree_vector_builder vb (masktype, nunits, 1);
7784 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7785 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7786 ? boolean_false_node : boolean_true_node);
7788 tree new_temp2 = make_ssa_name (vectype);
7789 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7790 new_temp, vec_oprnd1);
7791 vect_finish_stmt_generation (vinfo, stmt_info,
7792 g, gsi);
7793 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7794 new_temp = new_temp2;
7797 /* For exclusive scan, perform the perms[i] permutation once
7798 more. */
7799 if (i == 0
7800 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7801 && v == vec_oprnd2)
7803 v = new_temp;
7804 --i;
7805 continue;
7808 tree new_temp2 = make_ssa_name (vectype);
7809 g = gimple_build_assign (new_temp2, code, v, new_temp);
7810 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7811 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7813 v = new_temp2;
7816 tree new_temp = make_ssa_name (vectype);
7817 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7818 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7819 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7821 tree last_perm_arg = new_temp;
7822 /* For exclusive scan, new_temp computed above is the exclusive scan
7823 prefix sum. Turn it into inclusive prefix sum for the broadcast
7824 of the last element into orig. */
7825 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7827 last_perm_arg = make_ssa_name (vectype);
7828 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7829 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7830 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7833 orig = make_ssa_name (vectype);
7834 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7835 last_perm_arg, perms[units_log2]);
7836 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7837 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7839 if (!inscan_var_store)
7841 tree data_ref = fold_build2 (MEM_REF, vectype,
7842 unshare_expr (dataref_ptr),
7843 dataref_offset);
7844 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7845 g = gimple_build_assign (data_ref, new_temp);
7846 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7847 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7851 if (inscan_var_store)
7852 for (int j = 0; j < ncopies; j++)
7854 if (j != 0)
7855 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7857 tree data_ref = fold_build2 (MEM_REF, vectype,
7858 unshare_expr (dataref_ptr),
7859 dataref_offset);
7860 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7861 gimple *g = gimple_build_assign (data_ref, orig);
7862 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7863 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7865 return true;
7869 /* Function vectorizable_store.
7871 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7872 that can be vectorized.
7873 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7874 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7875 Return true if STMT_INFO is vectorizable in this way. */
7877 static bool
7878 vectorizable_store (vec_info *vinfo,
7879 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7880 gimple **vec_stmt, slp_tree slp_node,
7881 stmt_vector_for_cost *cost_vec)
7883 tree data_ref;
7884 tree op;
7885 tree vec_oprnd = NULL_TREE;
7886 tree elem_type;
7887 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7888 class loop *loop = NULL;
7889 machine_mode vec_mode;
7890 tree dummy;
7891 enum vect_def_type rhs_dt = vect_unknown_def_type;
7892 enum vect_def_type mask_dt = vect_unknown_def_type;
7893 tree dataref_ptr = NULL_TREE;
7894 tree dataref_offset = NULL_TREE;
7895 gimple *ptr_incr = NULL;
7896 int ncopies;
7897 int j;
7898 stmt_vec_info first_stmt_info;
7899 bool grouped_store;
7900 unsigned int group_size, i;
7901 vec<tree> oprnds = vNULL;
7902 vec<tree> result_chain = vNULL;
7903 vec<tree> vec_oprnds = vNULL;
7904 bool slp = (slp_node != NULL);
7905 unsigned int vec_num;
7906 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7907 tree aggr_type;
7908 gather_scatter_info gs_info;
7909 poly_uint64 vf;
7910 vec_load_store_type vls_type;
7911 tree ref_type;
7913 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7914 return false;
7916 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7917 && ! vec_stmt)
7918 return false;
7920 /* Is vectorizable store? */
7922 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7923 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7925 tree scalar_dest = gimple_assign_lhs (assign);
7926 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7927 && is_pattern_stmt_p (stmt_info))
7928 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7929 if (TREE_CODE (scalar_dest) != ARRAY_REF
7930 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7931 && TREE_CODE (scalar_dest) != INDIRECT_REF
7932 && TREE_CODE (scalar_dest) != COMPONENT_REF
7933 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7934 && TREE_CODE (scalar_dest) != REALPART_EXPR
7935 && TREE_CODE (scalar_dest) != MEM_REF)
7936 return false;
7938 else
7940 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7941 if (!call || !gimple_call_internal_p (call))
7942 return false;
7944 internal_fn ifn = gimple_call_internal_fn (call);
7945 if (!internal_store_fn_p (ifn))
7946 return false;
7948 if (slp_node != NULL)
7950 if (dump_enabled_p ())
7951 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7952 "SLP of masked stores not supported.\n");
7953 return false;
7956 int mask_index = internal_fn_mask_index (ifn);
7957 if (mask_index >= 0
7958 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
7959 &mask, NULL, &mask_dt, &mask_vectype))
7960 return false;
7963 op = vect_get_store_rhs (stmt_info);
7965 /* Cannot have hybrid store SLP -- that would mean storing to the
7966 same location twice. */
7967 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7969 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7970 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7972 if (loop_vinfo)
7974 loop = LOOP_VINFO_LOOP (loop_vinfo);
7975 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7977 else
7978 vf = 1;
7980 /* Multiple types in SLP are handled by creating the appropriate number of
7981 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7982 case of SLP. */
7983 if (slp)
7984 ncopies = 1;
7985 else
7986 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7988 gcc_assert (ncopies >= 1);
7990 /* FORNOW. This restriction should be relaxed. */
7991 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7993 if (dump_enabled_p ())
7994 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7995 "multiple types in nested loop.\n");
7996 return false;
7999 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
8000 op, &rhs_dt, &rhs_vectype, &vls_type))
8001 return false;
8003 elem_type = TREE_TYPE (vectype);
8004 vec_mode = TYPE_MODE (vectype);
8006 if (!STMT_VINFO_DATA_REF (stmt_info))
8007 return false;
8009 vect_memory_access_type memory_access_type;
8010 enum dr_alignment_support alignment_support_scheme;
8011 int misalignment;
8012 poly_int64 poffset;
8013 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
8014 ncopies, &memory_access_type, &poffset,
8015 &alignment_support_scheme, &misalignment, &gs_info))
8016 return false;
8018 if (mask)
8020 if (memory_access_type == VMAT_CONTIGUOUS)
8022 if (!VECTOR_MODE_P (vec_mode)
8023 || !can_vec_mask_load_store_p (vec_mode,
8024 TYPE_MODE (mask_vectype), false))
8025 return false;
8027 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8028 && (memory_access_type != VMAT_GATHER_SCATTER
8029 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
8031 if (dump_enabled_p ())
8032 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8033 "unsupported access type for masked store.\n");
8034 return false;
8036 else if (memory_access_type == VMAT_GATHER_SCATTER
8037 && gs_info.ifn == IFN_LAST
8038 && !gs_info.decl)
8040 if (dump_enabled_p ())
8041 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8042 "unsupported masked emulated scatter.\n");
8043 return false;
8046 else
8048 /* FORNOW. In some cases can vectorize even if data-type not supported
8049 (e.g. - array initialization with 0). */
8050 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
8051 return false;
8054 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8055 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
8056 && memory_access_type != VMAT_GATHER_SCATTER
8057 && (slp || memory_access_type != VMAT_CONTIGUOUS));
8058 if (grouped_store)
8060 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8061 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8062 group_size = DR_GROUP_SIZE (first_stmt_info);
8064 else
8066 first_stmt_info = stmt_info;
8067 first_dr_info = dr_info;
8068 group_size = vec_num = 1;
8071 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
8073 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
8074 memory_access_type))
8075 return false;
8078 if (!vec_stmt) /* transformation not required. */
8080 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8082 if (loop_vinfo
8083 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8084 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
8085 vls_type, group_size,
8086 memory_access_type, &gs_info,
8087 mask);
8089 if (slp_node
8090 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
8091 vectype))
8093 if (dump_enabled_p ())
8094 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8095 "incompatible vector types for invariants\n");
8096 return false;
8099 if (dump_enabled_p ()
8100 && memory_access_type != VMAT_ELEMENTWISE
8101 && memory_access_type != VMAT_GATHER_SCATTER
8102 && alignment_support_scheme != dr_aligned)
8103 dump_printf_loc (MSG_NOTE, vect_location,
8104 "Vectorizing an unaligned access.\n");
8106 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
8107 vect_model_store_cost (vinfo, stmt_info, ncopies,
8108 memory_access_type, &gs_info,
8109 alignment_support_scheme,
8110 misalignment, vls_type, slp_node, cost_vec);
8111 return true;
8113 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8115 /* Transform. */
8117 ensure_base_align (dr_info);
8119 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8121 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
8122 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
8123 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
8124 tree ptr, var, scale, vec_mask;
8125 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
8126 tree mask_halfvectype = mask_vectype;
8127 edge pe = loop_preheader_edge (loop);
8128 gimple_seq seq;
8129 basic_block new_bb;
8130 enum { NARROW, NONE, WIDEN } modifier;
8131 poly_uint64 scatter_off_nunits
8132 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
8134 if (known_eq (nunits, scatter_off_nunits))
8135 modifier = NONE;
8136 else if (known_eq (nunits * 2, scatter_off_nunits))
8138 modifier = WIDEN;
8140 /* Currently gathers and scatters are only supported for
8141 fixed-length vectors. */
8142 unsigned int count = scatter_off_nunits.to_constant ();
8143 vec_perm_builder sel (count, count, 1);
8144 for (i = 0; i < (unsigned int) count; ++i)
8145 sel.quick_push (i | (count / 2));
8147 vec_perm_indices indices (sel, 1, count);
8148 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
8149 indices);
8150 gcc_assert (perm_mask != NULL_TREE);
8152 else if (known_eq (nunits, scatter_off_nunits * 2))
8154 modifier = NARROW;
8156 /* Currently gathers and scatters are only supported for
8157 fixed-length vectors. */
8158 unsigned int count = nunits.to_constant ();
8159 vec_perm_builder sel (count, count, 1);
8160 for (i = 0; i < (unsigned int) count; ++i)
8161 sel.quick_push (i | (count / 2));
8163 vec_perm_indices indices (sel, 2, count);
8164 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
8165 gcc_assert (perm_mask != NULL_TREE);
8166 ncopies *= 2;
8168 if (mask)
8169 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
8171 else
8172 gcc_unreachable ();
8174 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
8175 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
8176 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
8177 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
8178 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
8179 scaletype = TREE_VALUE (arglist);
8181 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
8182 && TREE_CODE (rettype) == VOID_TYPE);
8184 ptr = fold_convert (ptrtype, gs_info.base);
8185 if (!is_gimple_min_invariant (ptr))
8187 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
8188 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
8189 gcc_assert (!new_bb);
8192 if (mask == NULL_TREE)
8194 mask_arg = build_int_cst (masktype, -1);
8195 mask_arg = vect_init_vector (vinfo, stmt_info,
8196 mask_arg, masktype, NULL);
8199 scale = build_int_cst (scaletype, gs_info.scale);
8201 auto_vec<tree> vec_oprnds0;
8202 auto_vec<tree> vec_oprnds1;
8203 auto_vec<tree> vec_masks;
8204 if (mask)
8206 tree mask_vectype = truth_type_for (vectype);
8207 vect_get_vec_defs_for_operand (vinfo, stmt_info,
8208 modifier == NARROW
8209 ? ncopies / 2 : ncopies,
8210 mask, &vec_masks, mask_vectype);
8212 vect_get_vec_defs_for_operand (vinfo, stmt_info,
8213 modifier == WIDEN
8214 ? ncopies / 2 : ncopies,
8215 gs_info.offset, &vec_oprnds0);
8216 vect_get_vec_defs_for_operand (vinfo, stmt_info,
8217 modifier == NARROW
8218 ? ncopies / 2 : ncopies,
8219 op, &vec_oprnds1);
8220 for (j = 0; j < ncopies; ++j)
8222 if (modifier == WIDEN)
8224 if (j & 1)
8225 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
8226 perm_mask, stmt_info, gsi);
8227 else
8228 op = vec_oprnd0 = vec_oprnds0[j / 2];
8229 src = vec_oprnd1 = vec_oprnds1[j];
8230 if (mask)
8231 mask_op = vec_mask = vec_masks[j];
8233 else if (modifier == NARROW)
8235 if (j & 1)
8236 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
8237 perm_mask, stmt_info, gsi);
8238 else
8239 src = vec_oprnd1 = vec_oprnds1[j / 2];
8240 op = vec_oprnd0 = vec_oprnds0[j];
8241 if (mask)
8242 mask_op = vec_mask = vec_masks[j / 2];
8244 else
8246 op = vec_oprnd0 = vec_oprnds0[j];
8247 src = vec_oprnd1 = vec_oprnds1[j];
8248 if (mask)
8249 mask_op = vec_mask = vec_masks[j];
8252 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
8254 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
8255 TYPE_VECTOR_SUBPARTS (srctype)));
8256 var = vect_get_new_ssa_name (srctype, vect_simple_var);
8257 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
8258 gassign *new_stmt
8259 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
8260 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8261 src = var;
8264 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
8266 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
8267 TYPE_VECTOR_SUBPARTS (idxtype)));
8268 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
8269 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
8270 gassign *new_stmt
8271 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
8272 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8273 op = var;
8276 if (mask)
8278 tree utype;
8279 mask_arg = mask_op;
8280 if (modifier == NARROW)
8282 var = vect_get_new_ssa_name (mask_halfvectype,
8283 vect_simple_var);
8284 gassign *new_stmt
8285 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
8286 : VEC_UNPACK_LO_EXPR,
8287 mask_op);
8288 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8289 mask_arg = var;
8291 tree optype = TREE_TYPE (mask_arg);
8292 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
8293 utype = masktype;
8294 else
8295 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
8296 var = vect_get_new_ssa_name (utype, vect_scalar_var);
8297 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
8298 gassign *new_stmt
8299 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
8300 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8301 mask_arg = var;
8302 if (!useless_type_conversion_p (masktype, utype))
8304 gcc_assert (TYPE_PRECISION (utype)
8305 <= TYPE_PRECISION (masktype));
8306 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
8307 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
8308 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8309 mask_arg = var;
8313 gcall *new_stmt
8314 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
8315 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8317 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8319 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8320 return true;
8322 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
8323 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
8325 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8326 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
8328 if (grouped_store)
8330 /* FORNOW */
8331 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
8333 /* We vectorize all the stmts of the interleaving group when we
8334 reach the last stmt in the group. */
8335 if (DR_GROUP_STORE_COUNT (first_stmt_info)
8336 < DR_GROUP_SIZE (first_stmt_info)
8337 && !slp)
8339 *vec_stmt = NULL;
8340 return true;
8343 if (slp)
8345 grouped_store = false;
8346 /* VEC_NUM is the number of vect stmts to be created for this
8347 group. */
8348 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8349 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8350 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
8351 == first_stmt_info);
8352 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8353 op = vect_get_store_rhs (first_stmt_info);
8355 else
8356 /* VEC_NUM is the number of vect stmts to be created for this
8357 group. */
8358 vec_num = group_size;
8360 ref_type = get_group_alias_ptr_type (first_stmt_info);
8362 else
8363 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8365 if (dump_enabled_p ())
8366 dump_printf_loc (MSG_NOTE, vect_location,
8367 "transform store. ncopies = %d\n", ncopies);
8369 if (memory_access_type == VMAT_ELEMENTWISE
8370 || memory_access_type == VMAT_STRIDED_SLP)
8372 gimple_stmt_iterator incr_gsi;
8373 bool insert_after;
8374 gimple *incr;
8375 tree offvar;
8376 tree ivstep;
8377 tree running_off;
8378 tree stride_base, stride_step, alias_off;
8379 tree vec_oprnd;
8380 tree dr_offset;
8381 unsigned int g;
8382 /* Checked by get_load_store_type. */
8383 unsigned int const_nunits = nunits.to_constant ();
8385 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8386 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
8388 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8389 stride_base
8390 = fold_build_pointer_plus
8391 (DR_BASE_ADDRESS (first_dr_info->dr),
8392 size_binop (PLUS_EXPR,
8393 convert_to_ptrofftype (dr_offset),
8394 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8395 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8397 /* For a store with loop-invariant (but other than power-of-2)
8398 stride (i.e. not a grouped access) like so:
8400 for (i = 0; i < n; i += stride)
8401 array[i] = ...;
8403 we generate a new induction variable and new stores from
8404 the components of the (vectorized) rhs:
8406 for (j = 0; ; j += VF*stride)
8407 vectemp = ...;
8408 tmp1 = vectemp[0];
8409 array[j] = tmp1;
8410 tmp2 = vectemp[1];
8411 array[j + stride] = tmp2;
8415 unsigned nstores = const_nunits;
8416 unsigned lnel = 1;
8417 tree ltype = elem_type;
8418 tree lvectype = vectype;
8419 if (slp)
8421 if (group_size < const_nunits
8422 && const_nunits % group_size == 0)
8424 nstores = const_nunits / group_size;
8425 lnel = group_size;
8426 ltype = build_vector_type (elem_type, group_size);
8427 lvectype = vectype;
8429 /* First check if vec_extract optab doesn't support extraction
8430 of vector elts directly. */
8431 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
8432 machine_mode vmode;
8433 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8434 || !related_vector_mode (TYPE_MODE (vectype), elmode,
8435 group_size).exists (&vmode)
8436 || (convert_optab_handler (vec_extract_optab,
8437 TYPE_MODE (vectype), vmode)
8438 == CODE_FOR_nothing))
8440 /* Try to avoid emitting an extract of vector elements
8441 by performing the extracts using an integer type of the
8442 same size, extracting from a vector of those and then
8443 re-interpreting it as the original vector type if
8444 supported. */
8445 unsigned lsize
8446 = group_size * GET_MODE_BITSIZE (elmode);
8447 unsigned int lnunits = const_nunits / group_size;
8448 /* If we can't construct such a vector fall back to
8449 element extracts from the original vector type and
8450 element size stores. */
8451 if (int_mode_for_size (lsize, 0).exists (&elmode)
8452 && VECTOR_MODE_P (TYPE_MODE (vectype))
8453 && related_vector_mode (TYPE_MODE (vectype), elmode,
8454 lnunits).exists (&vmode)
8455 && (convert_optab_handler (vec_extract_optab,
8456 vmode, elmode)
8457 != CODE_FOR_nothing))
8459 nstores = lnunits;
8460 lnel = group_size;
8461 ltype = build_nonstandard_integer_type (lsize, 1);
8462 lvectype = build_vector_type (ltype, nstores);
8464 /* Else fall back to vector extraction anyway.
8465 Fewer stores are more important than avoiding spilling
8466 of the vector we extract from. Compared to the
8467 construction case in vectorizable_load no store-forwarding
8468 issue exists here for reasonable archs. */
8471 else if (group_size >= const_nunits
8472 && group_size % const_nunits == 0)
8474 nstores = 1;
8475 lnel = const_nunits;
8476 ltype = vectype;
8477 lvectype = vectype;
8479 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8480 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8483 ivstep = stride_step;
8484 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8485 build_int_cst (TREE_TYPE (ivstep), vf));
8487 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8489 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8490 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8491 create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
8492 loop, &incr_gsi, insert_after,
8493 &offvar, NULL);
8494 incr = gsi_stmt (incr_gsi);
8496 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8498 alias_off = build_int_cst (ref_type, 0);
8499 stmt_vec_info next_stmt_info = first_stmt_info;
8500 for (g = 0; g < group_size; g++)
8502 running_off = offvar;
8503 if (g)
8505 tree size = TYPE_SIZE_UNIT (ltype);
8506 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
8507 size);
8508 tree newoff = copy_ssa_name (running_off, NULL);
8509 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8510 running_off, pos);
8511 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8512 running_off = newoff;
8514 if (!slp)
8515 op = vect_get_store_rhs (next_stmt_info);
8516 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
8517 op, &vec_oprnds);
8518 unsigned int group_el = 0;
8519 unsigned HOST_WIDE_INT
8520 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8521 for (j = 0; j < ncopies; j++)
8523 vec_oprnd = vec_oprnds[j];
8524 /* Pun the vector to extract from if necessary. */
8525 if (lvectype != vectype)
8527 tree tem = make_ssa_name (lvectype);
8528 gimple *pun
8529 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
8530 lvectype, vec_oprnd));
8531 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8532 vec_oprnd = tem;
8534 for (i = 0; i < nstores; i++)
8536 tree newref, newoff;
8537 gimple *incr, *assign;
8538 tree size = TYPE_SIZE (ltype);
8539 /* Extract the i'th component. */
8540 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8541 bitsize_int (i), size);
8542 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8543 size, pos);
8545 elem = force_gimple_operand_gsi (gsi, elem, true,
8546 NULL_TREE, true,
8547 GSI_SAME_STMT);
8549 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8550 group_el * elsz);
8551 newref = build2 (MEM_REF, ltype,
8552 running_off, this_off);
8553 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8555 /* And store it to *running_off. */
8556 assign = gimple_build_assign (newref, elem);
8557 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8559 group_el += lnel;
8560 if (! slp
8561 || group_el == group_size)
8563 newoff = copy_ssa_name (running_off, NULL);
8564 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8565 running_off, stride_step);
8566 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8568 running_off = newoff;
8569 group_el = 0;
8571 if (g == group_size - 1
8572 && !slp)
8574 if (j == 0 && i == 0)
8575 *vec_stmt = assign;
8576 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8580 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8581 vec_oprnds.release ();
8582 if (slp)
8583 break;
8586 return true;
8589 auto_vec<tree> dr_chain (group_size);
8590 oprnds.create (group_size);
8592 gcc_assert (alignment_support_scheme);
8593 vec_loop_masks *loop_masks
8594 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8595 ? &LOOP_VINFO_MASKS (loop_vinfo)
8596 : NULL);
8597 vec_loop_lens *loop_lens
8598 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8599 ? &LOOP_VINFO_LENS (loop_vinfo)
8600 : NULL);
8602 /* Shouldn't go with length-based approach if fully masked. */
8603 gcc_assert (!loop_lens || !loop_masks);
8605 /* Targets with store-lane instructions must not require explicit
8606 realignment. vect_supportable_dr_alignment always returns either
8607 dr_aligned or dr_unaligned_supported for masked operations. */
8608 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8609 && !mask
8610 && !loop_masks)
8611 || alignment_support_scheme == dr_aligned
8612 || alignment_support_scheme == dr_unaligned_supported);
8614 tree offset = NULL_TREE;
8615 if (!known_eq (poffset, 0))
8616 offset = size_int (poffset);
8618 tree bump;
8619 tree vec_offset = NULL_TREE;
8620 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8622 aggr_type = NULL_TREE;
8623 bump = NULL_TREE;
8625 else if (memory_access_type == VMAT_GATHER_SCATTER)
8627 aggr_type = elem_type;
8628 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
8629 &bump, &vec_offset, loop_lens);
8631 else
8633 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8634 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8635 else
8636 aggr_type = vectype;
8637 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
8638 memory_access_type, loop_lens);
8641 if (mask)
8642 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8644 /* In case the vectorization factor (VF) is bigger than the number
8645 of elements that we can fit in a vectype (nunits), we have to generate
8646 more than one vector stmt - i.e - we need to "unroll" the
8647 vector stmt by a factor VF/nunits. */
8649 /* In case of interleaving (non-unit grouped access):
8651 S1: &base + 2 = x2
8652 S2: &base = x0
8653 S3: &base + 1 = x1
8654 S4: &base + 3 = x3
8656 We create vectorized stores starting from base address (the access of the
8657 first stmt in the chain (S2 in the above example), when the last store stmt
8658 of the chain (S4) is reached:
8660 VS1: &base = vx2
8661 VS2: &base + vec_size*1 = vx0
8662 VS3: &base + vec_size*2 = vx1
8663 VS4: &base + vec_size*3 = vx3
8665 Then permutation statements are generated:
8667 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8668 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8671 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8672 (the order of the data-refs in the output of vect_permute_store_chain
8673 corresponds to the order of scalar stmts in the interleaving chain - see
8674 the documentation of vect_permute_store_chain()).
8676 In case of both multiple types and interleaving, above vector stores and
8677 permutation stmts are created for every copy. The result vector stmts are
8678 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8679 STMT_VINFO_RELATED_STMT for the next copies.
8682 auto_vec<tree> vec_masks;
8683 tree vec_mask = NULL;
8684 auto_vec<tree> vec_offsets;
8685 auto_vec<vec<tree> > gvec_oprnds;
8686 gvec_oprnds.safe_grow_cleared (group_size, true);
8687 for (j = 0; j < ncopies; j++)
8689 gimple *new_stmt;
8690 if (j == 0)
8692 if (slp)
8694 /* Get vectorized arguments for SLP_NODE. */
8695 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
8696 op, &vec_oprnds);
8697 vec_oprnd = vec_oprnds[0];
8699 else
8701 /* For interleaved stores we collect vectorized defs for all the
8702 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8703 used as an input to vect_permute_store_chain().
8705 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8706 and OPRNDS are of size 1. */
8707 stmt_vec_info next_stmt_info = first_stmt_info;
8708 for (i = 0; i < group_size; i++)
8710 /* Since gaps are not supported for interleaved stores,
8711 DR_GROUP_SIZE is the exact number of stmts in the chain.
8712 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8713 that there is no interleaving, DR_GROUP_SIZE is 1,
8714 and only one iteration of the loop will be executed. */
8715 op = vect_get_store_rhs (next_stmt_info);
8716 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8717 ncopies, op, &gvec_oprnds[i]);
8718 vec_oprnd = gvec_oprnds[i][0];
8719 dr_chain.quick_push (gvec_oprnds[i][0]);
8720 oprnds.quick_push (gvec_oprnds[i][0]);
8721 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8723 if (mask)
8725 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8726 mask, &vec_masks, mask_vectype);
8727 vec_mask = vec_masks[0];
8731 /* We should have catched mismatched types earlier. */
8732 gcc_assert (useless_type_conversion_p (vectype,
8733 TREE_TYPE (vec_oprnd)));
8734 bool simd_lane_access_p
8735 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8736 if (simd_lane_access_p
8737 && !loop_masks
8738 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8739 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8740 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
8741 && integer_zerop (DR_INIT (first_dr_info->dr))
8742 && alias_sets_conflict_p (get_alias_set (aggr_type),
8743 get_alias_set (TREE_TYPE (ref_type))))
8745 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8746 dataref_offset = build_int_cst (ref_type, 0);
8748 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8749 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8750 slp_node, &gs_info, &dataref_ptr,
8751 &vec_offsets);
8752 else
8753 dataref_ptr
8754 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8755 simd_lane_access_p ? loop : NULL,
8756 offset, &dummy, gsi, &ptr_incr,
8757 simd_lane_access_p, bump);
8759 else
8761 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
8762 /* For interleaved stores we created vectorized defs for all the
8763 defs stored in OPRNDS in the previous iteration (previous copy).
8764 DR_CHAIN is then used as an input to vect_permute_store_chain().
8765 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8766 OPRNDS are of size 1. */
8767 for (i = 0; i < group_size; i++)
8769 vec_oprnd = gvec_oprnds[i][j];
8770 dr_chain[i] = gvec_oprnds[i][j];
8771 oprnds[i] = gvec_oprnds[i][j];
8773 if (mask)
8774 vec_mask = vec_masks[j];
8775 if (dataref_offset)
8776 dataref_offset
8777 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8778 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8779 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8780 stmt_info, bump);
8783 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8785 tree vec_array;
8787 /* Get an array into which we can store the individual vectors. */
8788 vec_array = create_vector_array (vectype, vec_num);
8790 /* Invalidate the current contents of VEC_ARRAY. This should
8791 become an RTL clobber too, which prevents the vector registers
8792 from being upward-exposed. */
8793 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8795 /* Store the individual vectors into the array. */
8796 for (i = 0; i < vec_num; i++)
8798 vec_oprnd = dr_chain[i];
8799 write_vector_array (vinfo, stmt_info,
8800 gsi, vec_oprnd, vec_array, i);
8803 tree final_mask = NULL;
8804 if (loop_masks)
8805 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
8806 ncopies, vectype, j);
8807 if (vec_mask)
8808 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8809 final_mask, vec_mask, gsi);
8811 gcall *call;
8812 if (final_mask)
8814 /* Emit:
8815 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8816 VEC_ARRAY). */
8817 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8818 tree alias_ptr = build_int_cst (ref_type, align);
8819 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8820 dataref_ptr, alias_ptr,
8821 final_mask, vec_array);
8823 else
8825 /* Emit:
8826 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8827 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8828 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8829 vec_array);
8830 gimple_call_set_lhs (call, data_ref);
8832 gimple_call_set_nothrow (call, true);
8833 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8834 new_stmt = call;
8836 /* Record that VEC_ARRAY is now dead. */
8837 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8839 else
8841 new_stmt = NULL;
8842 if (grouped_store)
8844 if (j == 0)
8845 result_chain.create (group_size);
8846 /* Permute. */
8847 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8848 gsi, &result_chain);
8851 stmt_vec_info next_stmt_info = first_stmt_info;
8852 for (i = 0; i < vec_num; i++)
8854 unsigned misalign;
8855 unsigned HOST_WIDE_INT align;
8857 tree final_mask = NULL_TREE;
8858 tree final_len = NULL_TREE;
8859 tree bias = NULL_TREE;
8860 if (loop_masks)
8861 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
8862 vec_num * ncopies,
8863 vectype, vec_num * j + i);
8864 if (vec_mask)
8865 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8866 final_mask, vec_mask, gsi);
8868 if (memory_access_type == VMAT_GATHER_SCATTER
8869 && gs_info.ifn != IFN_LAST)
8871 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8872 vec_offset = vec_offsets[vec_num * j + i];
8873 tree scale = size_int (gs_info.scale);
8875 if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
8877 if (loop_lens)
8878 final_len
8879 = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
8880 vec_num * ncopies, vectype,
8881 vec_num * j + i, 1);
8882 else
8883 final_len
8884 = build_int_cst (sizetype,
8885 TYPE_VECTOR_SUBPARTS (vectype));
8886 signed char biasval
8887 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8888 bias = build_int_cst (intQI_type_node, biasval);
8889 if (!final_mask)
8891 mask_vectype = truth_type_for (vectype);
8892 final_mask = build_minus_one_cst (mask_vectype);
8896 gcall *call;
8897 if (final_len && final_mask)
8898 call
8899 = gimple_build_call_internal (IFN_MASK_LEN_SCATTER_STORE,
8900 7, dataref_ptr, vec_offset,
8901 scale, vec_oprnd, final_mask,
8902 final_len, bias);
8903 else if (final_mask)
8904 call = gimple_build_call_internal
8905 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8906 scale, vec_oprnd, final_mask);
8907 else
8908 call = gimple_build_call_internal
8909 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8910 scale, vec_oprnd);
8911 gimple_call_set_nothrow (call, true);
8912 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8913 new_stmt = call;
8914 break;
8916 else if (memory_access_type == VMAT_GATHER_SCATTER)
8918 /* Emulated scatter. */
8919 gcc_assert (!final_mask);
8920 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
8921 unsigned HOST_WIDE_INT const_offset_nunits
8922 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
8923 .to_constant ();
8924 vec<constructor_elt, va_gc> *ctor_elts;
8925 vec_alloc (ctor_elts, const_nunits);
8926 gimple_seq stmts = NULL;
8927 tree elt_type = TREE_TYPE (vectype);
8928 unsigned HOST_WIDE_INT elt_size
8929 = tree_to_uhwi (TYPE_SIZE (elt_type));
8930 /* We support offset vectors with more elements
8931 than the data vector for now. */
8932 unsigned HOST_WIDE_INT factor
8933 = const_offset_nunits / const_nunits;
8934 vec_offset = vec_offsets[j / factor];
8935 unsigned elt_offset = (j % factor) * const_nunits;
8936 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
8937 tree scale = size_int (gs_info.scale);
8938 align = get_object_alignment (DR_REF (first_dr_info->dr));
8939 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
8940 for (unsigned k = 0; k < const_nunits; ++k)
8942 /* Compute the offsetted pointer. */
8943 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
8944 bitsize_int (k + elt_offset));
8945 tree idx = gimple_build (&stmts, BIT_FIELD_REF,
8946 idx_type, vec_offset,
8947 TYPE_SIZE (idx_type), boff);
8948 idx = gimple_convert (&stmts, sizetype, idx);
8949 idx = gimple_build (&stmts, MULT_EXPR,
8950 sizetype, idx, scale);
8951 tree ptr = gimple_build (&stmts, PLUS_EXPR,
8952 TREE_TYPE (dataref_ptr),
8953 dataref_ptr, idx);
8954 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
8955 /* Extract the element to be stored. */
8956 tree elt = gimple_build (&stmts, BIT_FIELD_REF,
8957 TREE_TYPE (vectype), vec_oprnd,
8958 TYPE_SIZE (elt_type),
8959 bitsize_int (k * elt_size));
8960 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
8961 stmts = NULL;
8962 tree ref = build2 (MEM_REF, ltype, ptr,
8963 build_int_cst (ref_type, 0));
8964 new_stmt = gimple_build_assign (ref, elt);
8965 vect_finish_stmt_generation (vinfo, stmt_info,
8966 new_stmt, gsi);
8968 break;
8971 if (i > 0)
8972 /* Bump the vector pointer. */
8973 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8974 gsi, stmt_info, bump);
8976 if (slp)
8977 vec_oprnd = vec_oprnds[i];
8978 else if (grouped_store)
8979 /* For grouped stores vectorized defs are interleaved in
8980 vect_permute_store_chain(). */
8981 vec_oprnd = result_chain[i];
8983 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8984 if (alignment_support_scheme == dr_aligned)
8985 misalign = 0;
8986 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
8988 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8989 misalign = 0;
8991 else
8992 misalign = misalignment;
8993 if (dataref_offset == NULL_TREE
8994 && TREE_CODE (dataref_ptr) == SSA_NAME)
8995 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8996 misalign);
8997 align = least_bit_hwi (misalign | align);
8999 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9001 tree perm_mask = perm_mask_for_reverse (vectype);
9002 tree perm_dest = vect_create_destination_var
9003 (vect_get_store_rhs (stmt_info), vectype);
9004 tree new_temp = make_ssa_name (perm_dest);
9006 /* Generate the permute statement. */
9007 gimple *perm_stmt
9008 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
9009 vec_oprnd, perm_mask);
9010 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
9012 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
9013 vec_oprnd = new_temp;
9016 /* Compute IFN when LOOP_LENS or final_mask valid. */
9017 machine_mode vmode = TYPE_MODE (vectype);
9018 machine_mode new_vmode = vmode;
9019 internal_fn partial_ifn = IFN_LAST;
9020 if (loop_lens)
9022 opt_machine_mode new_ovmode
9023 = get_len_load_store_mode (vmode, false, &partial_ifn);
9024 new_vmode = new_ovmode.require ();
9025 unsigned factor
9026 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
9027 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
9028 vec_num * ncopies, vectype,
9029 vec_num * j + i, factor);
9031 else if (final_mask)
9033 if (!can_vec_mask_load_store_p (vmode,
9034 TYPE_MODE (TREE_TYPE (final_mask)),
9035 false, &partial_ifn))
9036 gcc_unreachable ();
9039 if (partial_ifn == IFN_MASK_LEN_STORE)
9041 if (!final_len)
9043 /* Pass VF value to 'len' argument of
9044 MASK_LEN_STORE if LOOP_LENS is invalid. */
9045 tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
9046 final_len
9047 = build_int_cst (iv_type,
9048 TYPE_VECTOR_SUBPARTS (vectype));
9050 if (!final_mask)
9052 /* Pass all ones value to 'mask' argument of
9053 MASK_LEN_STORE if final_mask is invalid. */
9054 mask_vectype = truth_type_for (vectype);
9055 final_mask = build_minus_one_cst (mask_vectype);
9058 if (final_len)
9060 signed char biasval
9061 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9063 bias = build_int_cst (intQI_type_node, biasval);
9066 /* Arguments are ready. Create the new vector stmt. */
9067 if (final_len)
9069 gcall *call;
9070 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9071 /* Need conversion if it's wrapped with VnQI. */
9072 if (vmode != new_vmode)
9074 tree new_vtype
9075 = build_vector_type_for_mode (unsigned_intQI_type_node,
9076 new_vmode);
9077 tree var
9078 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
9079 vec_oprnd
9080 = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
9081 gassign *new_stmt
9082 = gimple_build_assign (var, VIEW_CONVERT_EXPR,
9083 vec_oprnd);
9084 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
9085 gsi);
9086 vec_oprnd = var;
9089 if (partial_ifn == IFN_MASK_LEN_STORE)
9090 call = gimple_build_call_internal (IFN_MASK_LEN_STORE, 6,
9091 dataref_ptr, ptr,
9092 final_mask, final_len,
9093 bias, vec_oprnd);
9094 else
9095 call
9096 = gimple_build_call_internal (IFN_LEN_STORE, 5,
9097 dataref_ptr, ptr,
9098 final_len, bias,
9099 vec_oprnd);
9100 gimple_call_set_nothrow (call, true);
9101 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9102 new_stmt = call;
9104 else if (final_mask)
9106 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9107 gcall *call
9108 = gimple_build_call_internal (IFN_MASK_STORE, 4,
9109 dataref_ptr, ptr,
9110 final_mask, vec_oprnd);
9111 gimple_call_set_nothrow (call, true);
9112 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9113 new_stmt = call;
9115 else
9117 data_ref = fold_build2 (MEM_REF, vectype,
9118 dataref_ptr,
9119 dataref_offset
9120 ? dataref_offset
9121 : build_int_cst (ref_type, 0));
9122 if (alignment_support_scheme == dr_aligned)
9124 else
9125 TREE_TYPE (data_ref)
9126 = build_aligned_type (TREE_TYPE (data_ref),
9127 align * BITS_PER_UNIT);
9128 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9129 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
9130 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9133 if (slp)
9134 continue;
9136 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9137 if (!next_stmt_info)
9138 break;
9141 if (!slp)
9143 if (j == 0)
9144 *vec_stmt = new_stmt;
9145 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9149 for (i = 0; i < group_size; ++i)
9151 vec<tree> oprndsi = gvec_oprnds[i];
9152 oprndsi.release ();
9154 oprnds.release ();
9155 result_chain.release ();
9156 vec_oprnds.release ();
9158 return true;
9161 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
9162 VECTOR_CST mask. No checks are made that the target platform supports the
9163 mask, so callers may wish to test can_vec_perm_const_p separately, or use
9164 vect_gen_perm_mask_checked. */
9166 tree
9167 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
9169 tree mask_type;
9171 poly_uint64 nunits = sel.length ();
9172 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
9174 mask_type = build_vector_type (ssizetype, nunits);
9175 return vec_perm_indices_to_tree (mask_type, sel);
9178 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
9179 i.e. that the target supports the pattern _for arbitrary input vectors_. */
9181 tree
9182 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
9184 machine_mode vmode = TYPE_MODE (vectype);
9185 gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
9186 return vect_gen_perm_mask_any (vectype, sel);
9189 /* Given a vector variable X and Y, that was generated for the scalar
9190 STMT_INFO, generate instructions to permute the vector elements of X and Y
9191 using permutation mask MASK_VEC, insert them at *GSI and return the
9192 permuted vector variable. */
9194 static tree
9195 permute_vec_elements (vec_info *vinfo,
9196 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
9197 gimple_stmt_iterator *gsi)
9199 tree vectype = TREE_TYPE (x);
9200 tree perm_dest, data_ref;
9201 gimple *perm_stmt;
9203 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
9204 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
9205 perm_dest = vect_create_destination_var (scalar_dest, vectype);
9206 else
9207 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
9208 data_ref = make_ssa_name (perm_dest);
9210 /* Generate the permute statement. */
9211 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
9212 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
9214 return data_ref;
9217 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
9218 inserting them on the loops preheader edge. Returns true if we
9219 were successful in doing so (and thus STMT_INFO can be moved then),
9220 otherwise returns false. HOIST_P indicates if we want to hoist the
9221 definitions of all SSA uses, it would be false when we are costing. */
9223 static bool
9224 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop, bool hoist_p)
9226 ssa_op_iter i;
9227 tree op;
9228 bool any = false;
9230 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9232 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9233 if (!gimple_nop_p (def_stmt)
9234 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9236 /* Make sure we don't need to recurse. While we could do
9237 so in simple cases when there are more complex use webs
9238 we don't have an easy way to preserve stmt order to fulfil
9239 dependencies within them. */
9240 tree op2;
9241 ssa_op_iter i2;
9242 if (gimple_code (def_stmt) == GIMPLE_PHI)
9243 return false;
9244 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
9246 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
9247 if (!gimple_nop_p (def_stmt2)
9248 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
9249 return false;
9251 any = true;
9255 if (!any)
9256 return true;
9258 if (!hoist_p)
9259 return true;
9261 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9263 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9264 if (!gimple_nop_p (def_stmt)
9265 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9267 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
9268 gsi_remove (&gsi, false);
9269 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
9273 return true;
9276 /* vectorizable_load.
9278 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
9279 that can be vectorized.
9280 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9281 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
9282 Return true if STMT_INFO is vectorizable in this way. */
9284 static bool
9285 vectorizable_load (vec_info *vinfo,
9286 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9287 gimple **vec_stmt, slp_tree slp_node,
9288 stmt_vector_for_cost *cost_vec)
9290 tree scalar_dest;
9291 tree vec_dest = NULL;
9292 tree data_ref = NULL;
9293 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9294 class loop *loop = NULL;
9295 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
9296 bool nested_in_vect_loop = false;
9297 tree elem_type;
9298 /* Avoid false positive uninitialized warning, see PR110652. */
9299 tree new_temp = NULL_TREE;
9300 machine_mode mode;
9301 tree dummy;
9302 tree dataref_ptr = NULL_TREE;
9303 tree dataref_offset = NULL_TREE;
9304 gimple *ptr_incr = NULL;
9305 int ncopies;
9306 int i, j;
9307 unsigned int group_size;
9308 poly_uint64 group_gap_adj;
9309 tree msq = NULL_TREE, lsq;
9310 tree realignment_token = NULL_TREE;
9311 gphi *phi = NULL;
9312 vec<tree> dr_chain = vNULL;
9313 bool grouped_load = false;
9314 stmt_vec_info first_stmt_info;
9315 stmt_vec_info first_stmt_info_for_drptr = NULL;
9316 bool compute_in_loop = false;
9317 class loop *at_loop;
9318 int vec_num;
9319 bool slp = (slp_node != NULL);
9320 bool slp_perm = false;
9321 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9322 poly_uint64 vf;
9323 tree aggr_type;
9324 gather_scatter_info gs_info;
9325 tree ref_type;
9326 enum vect_def_type mask_dt = vect_unknown_def_type;
9328 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9329 return false;
9331 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9332 && ! vec_stmt)
9333 return false;
9335 if (!STMT_VINFO_DATA_REF (stmt_info))
9336 return false;
9338 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
9339 int mask_index = -1;
9340 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
9342 scalar_dest = gimple_assign_lhs (assign);
9343 if (TREE_CODE (scalar_dest) != SSA_NAME)
9344 return false;
9346 tree_code code = gimple_assign_rhs_code (assign);
9347 if (code != ARRAY_REF
9348 && code != BIT_FIELD_REF
9349 && code != INDIRECT_REF
9350 && code != COMPONENT_REF
9351 && code != IMAGPART_EXPR
9352 && code != REALPART_EXPR
9353 && code != MEM_REF
9354 && TREE_CODE_CLASS (code) != tcc_declaration)
9355 return false;
9357 else
9359 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9360 if (!call || !gimple_call_internal_p (call))
9361 return false;
9363 internal_fn ifn = gimple_call_internal_fn (call);
9364 if (!internal_load_fn_p (ifn))
9365 return false;
9367 scalar_dest = gimple_call_lhs (call);
9368 if (!scalar_dest)
9369 return false;
9371 mask_index = internal_fn_mask_index (ifn);
9372 /* ??? For SLP the mask operand is always last. */
9373 if (mask_index >= 0 && slp_node)
9374 mask_index = SLP_TREE_CHILDREN (slp_node).length () - 1;
9375 if (mask_index >= 0
9376 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
9377 &mask, NULL, &mask_dt, &mask_vectype))
9378 return false;
9381 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9382 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9384 if (loop_vinfo)
9386 loop = LOOP_VINFO_LOOP (loop_vinfo);
9387 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
9388 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9390 else
9391 vf = 1;
9393 /* Multiple types in SLP are handled by creating the appropriate number of
9394 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
9395 case of SLP. */
9396 if (slp)
9397 ncopies = 1;
9398 else
9399 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9401 gcc_assert (ncopies >= 1);
9403 /* FORNOW. This restriction should be relaxed. */
9404 if (nested_in_vect_loop && ncopies > 1)
9406 if (dump_enabled_p ())
9407 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9408 "multiple types in nested loop.\n");
9409 return false;
9412 /* Invalidate assumptions made by dependence analysis when vectorization
9413 on the unrolled body effectively re-orders stmts. */
9414 if (ncopies > 1
9415 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9416 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9417 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9419 if (dump_enabled_p ())
9420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9421 "cannot perform implicit CSE when unrolling "
9422 "with negative dependence distance\n");
9423 return false;
9426 elem_type = TREE_TYPE (vectype);
9427 mode = TYPE_MODE (vectype);
9429 /* FORNOW. In some cases can vectorize even if data-type not supported
9430 (e.g. - data copies). */
9431 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
9433 if (dump_enabled_p ())
9434 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9435 "Aligned load, but unsupported type.\n");
9436 return false;
9439 /* Check if the load is a part of an interleaving chain. */
9440 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
9442 grouped_load = true;
9443 /* FORNOW */
9444 gcc_assert (!nested_in_vect_loop);
9445 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9447 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9448 group_size = DR_GROUP_SIZE (first_stmt_info);
9450 /* Refuse non-SLP vectorization of SLP-only groups. */
9451 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
9453 if (dump_enabled_p ())
9454 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9455 "cannot vectorize load in non-SLP mode.\n");
9456 return false;
9459 /* Invalidate assumptions made by dependence analysis when vectorization
9460 on the unrolled body effectively re-orders stmts. */
9461 if (!PURE_SLP_STMT (stmt_info)
9462 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9463 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9464 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9466 if (dump_enabled_p ())
9467 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9468 "cannot perform implicit CSE when performing "
9469 "group loads with negative dependence distance\n");
9470 return false;
9473 else
9474 group_size = 1;
9476 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9478 slp_perm = true;
9480 if (!loop_vinfo)
9482 /* In BB vectorization we may not actually use a loaded vector
9483 accessing elements in excess of DR_GROUP_SIZE. */
9484 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9485 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
9486 unsigned HOST_WIDE_INT nunits;
9487 unsigned j, k, maxk = 0;
9488 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
9489 if (k > maxk)
9490 maxk = k;
9491 tree vectype = SLP_TREE_VECTYPE (slp_node);
9492 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
9493 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
9495 if (dump_enabled_p ())
9496 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9497 "BB vectorization with gaps at the end of "
9498 "a load is not supported\n");
9499 return false;
9503 auto_vec<tree> tem;
9504 unsigned n_perms;
9505 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
9506 true, &n_perms))
9508 if (dump_enabled_p ())
9509 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
9510 vect_location,
9511 "unsupported load permutation\n");
9512 return false;
9516 vect_memory_access_type memory_access_type;
9517 enum dr_alignment_support alignment_support_scheme;
9518 int misalignment;
9519 poly_int64 poffset;
9520 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
9521 ncopies, &memory_access_type, &poffset,
9522 &alignment_support_scheme, &misalignment, &gs_info))
9523 return false;
9525 if (mask)
9527 if (memory_access_type == VMAT_CONTIGUOUS)
9529 machine_mode vec_mode = TYPE_MODE (vectype);
9530 if (!VECTOR_MODE_P (vec_mode)
9531 || !can_vec_mask_load_store_p (vec_mode,
9532 TYPE_MODE (mask_vectype), true))
9533 return false;
9535 else if (memory_access_type != VMAT_LOAD_STORE_LANES
9536 && memory_access_type != VMAT_GATHER_SCATTER)
9538 if (dump_enabled_p ())
9539 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9540 "unsupported access type for masked load.\n");
9541 return false;
9543 else if (memory_access_type == VMAT_GATHER_SCATTER
9544 && gs_info.ifn == IFN_LAST
9545 && !gs_info.decl)
9547 if (dump_enabled_p ())
9548 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9549 "unsupported masked emulated gather.\n");
9550 return false;
9554 bool costing_p = !vec_stmt;
9556 if (costing_p) /* transformation not required. */
9558 if (slp_node
9559 && mask
9560 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
9561 mask_vectype))
9563 if (dump_enabled_p ())
9564 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9565 "incompatible vector types for invariants\n");
9566 return false;
9569 if (!slp)
9570 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
9572 if (loop_vinfo
9573 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
9574 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
9575 VLS_LOAD, group_size,
9576 memory_access_type, &gs_info,
9577 mask);
9579 if (dump_enabled_p ()
9580 && memory_access_type != VMAT_ELEMENTWISE
9581 && memory_access_type != VMAT_GATHER_SCATTER
9582 && alignment_support_scheme != dr_aligned)
9583 dump_printf_loc (MSG_NOTE, vect_location,
9584 "Vectorizing an unaligned access.\n");
9586 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9587 vinfo->any_known_not_updated_vssa = true;
9589 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
9592 if (!slp)
9593 gcc_assert (memory_access_type
9594 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
9596 if (dump_enabled_p () && !costing_p)
9597 dump_printf_loc (MSG_NOTE, vect_location,
9598 "transform load. ncopies = %d\n", ncopies);
9600 /* Transform. */
9602 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
9603 ensure_base_align (dr_info);
9605 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
9607 vect_build_gather_load_calls (vinfo, stmt_info, gsi, vec_stmt, &gs_info,
9608 mask, cost_vec);
9609 return true;
9612 if (memory_access_type == VMAT_INVARIANT)
9614 gcc_assert (!grouped_load && !mask && !bb_vinfo);
9615 /* If we have versioned for aliasing or the loop doesn't
9616 have any data dependencies that would preclude this,
9617 then we are sure this is a loop invariant load and
9618 thus we can insert it on the preheader edge.
9619 TODO: hoist_defs_of_uses should ideally be computed
9620 once at analysis time, remembered and used in the
9621 transform time. */
9622 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
9623 && !nested_in_vect_loop
9624 && hoist_defs_of_uses (stmt_info, loop, !costing_p));
9625 if (costing_p)
9627 enum vect_cost_model_location cost_loc
9628 = hoist_p ? vect_prologue : vect_body;
9629 unsigned int cost = record_stmt_cost (cost_vec, 1, scalar_load,
9630 stmt_info, 0, cost_loc);
9631 cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info, 0,
9632 cost_loc);
9633 unsigned int prologue_cost = hoist_p ? cost : 0;
9634 unsigned int inside_cost = hoist_p ? 0 : cost;
9635 if (dump_enabled_p ())
9636 dump_printf_loc (MSG_NOTE, vect_location,
9637 "vect_model_load_cost: inside_cost = %d, "
9638 "prologue_cost = %d .\n",
9639 inside_cost, prologue_cost);
9640 return true;
9642 if (hoist_p)
9644 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
9645 if (dump_enabled_p ())
9646 dump_printf_loc (MSG_NOTE, vect_location,
9647 "hoisting out of the vectorized loop: %G",
9648 (gimple *) stmt);
9649 scalar_dest = copy_ssa_name (scalar_dest);
9650 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
9651 edge pe = loop_preheader_edge (loop);
9652 gphi *vphi = get_virtual_phi (loop->header);
9653 tree vuse;
9654 if (vphi)
9655 vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
9656 else
9657 vuse = gimple_vuse (gsi_stmt (*gsi));
9658 gimple *new_stmt = gimple_build_assign (scalar_dest, rhs);
9659 gimple_set_vuse (new_stmt, vuse);
9660 gsi_insert_on_edge_immediate (pe, new_stmt);
9662 /* These copies are all equivalent. */
9663 if (hoist_p)
9664 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9665 vectype, NULL);
9666 else
9668 gimple_stmt_iterator gsi2 = *gsi;
9669 gsi_next (&gsi2);
9670 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9671 vectype, &gsi2);
9673 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
9674 if (slp)
9675 for (j = 0; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j)
9676 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9677 else
9679 for (j = 0; j < ncopies; ++j)
9680 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9681 *vec_stmt = new_stmt;
9683 return true;
9686 if (memory_access_type == VMAT_ELEMENTWISE
9687 || memory_access_type == VMAT_STRIDED_SLP)
9689 gimple_stmt_iterator incr_gsi;
9690 bool insert_after;
9691 tree offvar;
9692 tree ivstep;
9693 tree running_off;
9694 vec<constructor_elt, va_gc> *v = NULL;
9695 tree stride_base, stride_step, alias_off;
9696 /* Checked by get_load_store_type. */
9697 unsigned int const_nunits = nunits.to_constant ();
9698 unsigned HOST_WIDE_INT cst_offset = 0;
9699 tree dr_offset;
9700 unsigned int inside_cost = 0;
9702 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
9703 gcc_assert (!nested_in_vect_loop);
9705 if (grouped_load)
9707 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9708 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9710 else
9712 first_stmt_info = stmt_info;
9713 first_dr_info = dr_info;
9716 if (slp && grouped_load)
9718 group_size = DR_GROUP_SIZE (first_stmt_info);
9719 ref_type = get_group_alias_ptr_type (first_stmt_info);
9721 else
9723 if (grouped_load)
9724 cst_offset
9725 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
9726 * vect_get_place_in_interleaving_chain (stmt_info,
9727 first_stmt_info));
9728 group_size = 1;
9729 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
9732 if (!costing_p)
9734 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
9735 stride_base = fold_build_pointer_plus (
9736 DR_BASE_ADDRESS (first_dr_info->dr),
9737 size_binop (PLUS_EXPR, convert_to_ptrofftype (dr_offset),
9738 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
9739 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
9741 /* For a load with loop-invariant (but other than power-of-2)
9742 stride (i.e. not a grouped access) like so:
9744 for (i = 0; i < n; i += stride)
9745 ... = array[i];
9747 we generate a new induction variable and new accesses to
9748 form a new vector (or vectors, depending on ncopies):
9750 for (j = 0; ; j += VF*stride)
9751 tmp1 = array[j];
9752 tmp2 = array[j + stride];
9754 vectemp = {tmp1, tmp2, ...}
9757 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
9758 build_int_cst (TREE_TYPE (stride_step), vf));
9760 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
9762 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
9763 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
9764 create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
9765 loop, &incr_gsi, insert_after,
9766 &offvar, NULL);
9768 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
9771 running_off = offvar;
9772 alias_off = build_int_cst (ref_type, 0);
9773 int nloads = const_nunits;
9774 int lnel = 1;
9775 tree ltype = TREE_TYPE (vectype);
9776 tree lvectype = vectype;
9777 auto_vec<tree> dr_chain;
9778 if (memory_access_type == VMAT_STRIDED_SLP)
9780 if (group_size < const_nunits)
9782 /* First check if vec_init optab supports construction from vector
9783 elts directly. Otherwise avoid emitting a constructor of
9784 vector elements by performing the loads using an integer type
9785 of the same size, constructing a vector of those and then
9786 re-interpreting it as the original vector type. This avoids a
9787 huge runtime penalty due to the general inability to perform
9788 store forwarding from smaller stores to a larger load. */
9789 tree ptype;
9790 tree vtype
9791 = vector_vector_composition_type (vectype,
9792 const_nunits / group_size,
9793 &ptype);
9794 if (vtype != NULL_TREE)
9796 nloads = const_nunits / group_size;
9797 lnel = group_size;
9798 lvectype = vtype;
9799 ltype = ptype;
9802 else
9804 nloads = 1;
9805 lnel = const_nunits;
9806 ltype = vectype;
9808 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
9810 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9811 else if (nloads == 1)
9812 ltype = vectype;
9814 if (slp)
9816 /* For SLP permutation support we need to load the whole group,
9817 not only the number of vector stmts the permutation result
9818 fits in. */
9819 if (slp_perm)
9821 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9822 variable VF. */
9823 unsigned int const_vf = vf.to_constant ();
9824 ncopies = CEIL (group_size * const_vf, const_nunits);
9825 dr_chain.create (ncopies);
9827 else
9828 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9830 unsigned int group_el = 0;
9831 unsigned HOST_WIDE_INT
9832 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9833 unsigned int n_groups = 0;
9834 for (j = 0; j < ncopies; j++)
9836 if (nloads > 1 && !costing_p)
9837 vec_alloc (v, nloads);
9838 gimple *new_stmt = NULL;
9839 for (i = 0; i < nloads; i++)
9841 if (costing_p)
9843 if (VECTOR_TYPE_P (ltype))
9844 vect_get_load_cost (vinfo, stmt_info, 1,
9845 alignment_support_scheme, misalignment,
9846 false, &inside_cost, nullptr, cost_vec,
9847 cost_vec, true);
9848 else
9849 inside_cost += record_stmt_cost (cost_vec, 1, scalar_load,
9850 stmt_info, 0, vect_body);
9851 continue;
9853 tree this_off = build_int_cst (TREE_TYPE (alias_off),
9854 group_el * elsz + cst_offset);
9855 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9856 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9857 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
9858 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9859 if (nloads > 1)
9860 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9861 gimple_assign_lhs (new_stmt));
9863 group_el += lnel;
9864 if (! slp
9865 || group_el == group_size)
9867 n_groups++;
9868 /* When doing SLP make sure to not load elements from
9869 the next vector iteration, those will not be accessed
9870 so just use the last element again. See PR107451. */
9871 if (!slp || known_lt (n_groups, vf))
9873 tree newoff = copy_ssa_name (running_off);
9874 gimple *incr
9875 = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9876 running_off, stride_step);
9877 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
9878 running_off = newoff;
9880 group_el = 0;
9884 if (nloads > 1)
9886 if (costing_p)
9887 inside_cost += record_stmt_cost (cost_vec, 1, vec_construct,
9888 stmt_info, 0, vect_body);
9889 else
9891 tree vec_inv = build_constructor (lvectype, v);
9892 new_temp = vect_init_vector (vinfo, stmt_info, vec_inv,
9893 lvectype, gsi);
9894 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9895 if (lvectype != vectype)
9897 new_stmt
9898 = gimple_build_assign (make_ssa_name (vectype),
9899 VIEW_CONVERT_EXPR,
9900 build1 (VIEW_CONVERT_EXPR,
9901 vectype, new_temp));
9902 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
9903 gsi);
9908 if (!costing_p)
9910 if (slp)
9912 if (slp_perm)
9913 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
9914 else
9915 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9917 else
9919 if (j == 0)
9920 *vec_stmt = new_stmt;
9921 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9925 if (slp_perm)
9927 unsigned n_perms;
9928 if (costing_p)
9930 unsigned n_loads;
9931 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, vf,
9932 true, &n_perms, &n_loads);
9933 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
9934 first_stmt_info, 0, vect_body);
9936 else
9937 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
9938 false, &n_perms);
9941 if (costing_p && dump_enabled_p ())
9942 dump_printf_loc (MSG_NOTE, vect_location,
9943 "vect_model_load_cost: inside_cost = %u, "
9944 "prologue_cost = 0 .\n",
9945 inside_cost);
9947 return true;
9950 if (memory_access_type == VMAT_GATHER_SCATTER
9951 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9952 grouped_load = false;
9954 if (grouped_load
9955 || (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()))
9957 if (grouped_load)
9959 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9960 group_size = DR_GROUP_SIZE (first_stmt_info);
9962 else
9964 first_stmt_info = stmt_info;
9965 group_size = 1;
9967 /* For SLP vectorization we directly vectorize a subchain
9968 without permutation. */
9969 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9970 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9971 /* For BB vectorization always use the first stmt to base
9972 the data ref pointer on. */
9973 if (bb_vinfo)
9974 first_stmt_info_for_drptr
9975 = vect_find_first_scalar_stmt_in_slp (slp_node);
9977 /* Check if the chain of loads is already vectorized. */
9978 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
9979 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9980 ??? But we can only do so if there is exactly one
9981 as we have no way to get at the rest. Leave the CSE
9982 opportunity alone.
9983 ??? With the group load eventually participating
9984 in multiple different permutations (having multiple
9985 slp nodes which refer to the same group) the CSE
9986 is even wrong code. See PR56270. */
9987 && !slp)
9989 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9990 return true;
9992 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9993 group_gap_adj = 0;
9995 /* VEC_NUM is the number of vect stmts to be created for this group. */
9996 if (slp)
9998 grouped_load = false;
9999 /* If an SLP permutation is from N elements to N elements,
10000 and if one vector holds a whole number of N, we can load
10001 the inputs to the permutation in the same way as an
10002 unpermuted sequence. In other cases we need to load the
10003 whole group, not only the number of vector stmts the
10004 permutation result fits in. */
10005 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
10006 if (slp_perm
10007 && (group_size != scalar_lanes
10008 || !multiple_p (nunits, group_size)))
10010 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
10011 variable VF; see vect_transform_slp_perm_load. */
10012 unsigned int const_vf = vf.to_constant ();
10013 unsigned int const_nunits = nunits.to_constant ();
10014 vec_num = CEIL (group_size * const_vf, const_nunits);
10015 group_gap_adj = vf * group_size - nunits * vec_num;
10017 else
10019 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10020 group_gap_adj
10021 = group_size - scalar_lanes;
10024 else
10025 vec_num = group_size;
10027 ref_type = get_group_alias_ptr_type (first_stmt_info);
10029 else
10031 first_stmt_info = stmt_info;
10032 first_dr_info = dr_info;
10033 group_size = vec_num = 1;
10034 group_gap_adj = 0;
10035 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
10036 if (slp)
10037 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10040 gcc_assert (alignment_support_scheme);
10041 vec_loop_masks *loop_masks
10042 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
10043 ? &LOOP_VINFO_MASKS (loop_vinfo)
10044 : NULL);
10045 vec_loop_lens *loop_lens
10046 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
10047 ? &LOOP_VINFO_LENS (loop_vinfo)
10048 : NULL);
10050 /* Shouldn't go with length-based approach if fully masked. */
10051 gcc_assert (!loop_lens || !loop_masks);
10053 /* Targets with store-lane instructions must not require explicit
10054 realignment. vect_supportable_dr_alignment always returns either
10055 dr_aligned or dr_unaligned_supported for masked operations. */
10056 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
10057 && !mask
10058 && !loop_masks)
10059 || alignment_support_scheme == dr_aligned
10060 || alignment_support_scheme == dr_unaligned_supported);
10062 /* In case the vectorization factor (VF) is bigger than the number
10063 of elements that we can fit in a vectype (nunits), we have to generate
10064 more than one vector stmt - i.e - we need to "unroll" the
10065 vector stmt by a factor VF/nunits. In doing so, we record a pointer
10066 from one copy of the vector stmt to the next, in the field
10067 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
10068 stages to find the correct vector defs to be used when vectorizing
10069 stmts that use the defs of the current stmt. The example below
10070 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
10071 need to create 4 vectorized stmts):
10073 before vectorization:
10074 RELATED_STMT VEC_STMT
10075 S1: x = memref - -
10076 S2: z = x + 1 - -
10078 step 1: vectorize stmt S1:
10079 We first create the vector stmt VS1_0, and, as usual, record a
10080 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
10081 Next, we create the vector stmt VS1_1, and record a pointer to
10082 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
10083 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
10084 stmts and pointers:
10085 RELATED_STMT VEC_STMT
10086 VS1_0: vx0 = memref0 VS1_1 -
10087 VS1_1: vx1 = memref1 VS1_2 -
10088 VS1_2: vx2 = memref2 VS1_3 -
10089 VS1_3: vx3 = memref3 - -
10090 S1: x = load - VS1_0
10091 S2: z = x + 1 - -
10094 /* In case of interleaving (non-unit grouped access):
10096 S1: x2 = &base + 2
10097 S2: x0 = &base
10098 S3: x1 = &base + 1
10099 S4: x3 = &base + 3
10101 Vectorized loads are created in the order of memory accesses
10102 starting from the access of the first stmt of the chain:
10104 VS1: vx0 = &base
10105 VS2: vx1 = &base + vec_size*1
10106 VS3: vx3 = &base + vec_size*2
10107 VS4: vx4 = &base + vec_size*3
10109 Then permutation statements are generated:
10111 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
10112 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
10115 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
10116 (the order of the data-refs in the output of vect_permute_load_chain
10117 corresponds to the order of scalar stmts in the interleaving chain - see
10118 the documentation of vect_permute_load_chain()).
10119 The generation of permutation stmts and recording them in
10120 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
10122 In case of both multiple types and interleaving, the vector loads and
10123 permutation stmts above are created for every copy. The result vector
10124 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
10125 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
10127 /* If the data reference is aligned (dr_aligned) or potentially unaligned
10128 on a target that supports unaligned accesses (dr_unaligned_supported)
10129 we generate the following code:
10130 p = initial_addr;
10131 indx = 0;
10132 loop {
10133 p = p + indx * vectype_size;
10134 vec_dest = *(p);
10135 indx = indx + 1;
10138 Otherwise, the data reference is potentially unaligned on a target that
10139 does not support unaligned accesses (dr_explicit_realign_optimized) -
10140 then generate the following code, in which the data in each iteration is
10141 obtained by two vector loads, one from the previous iteration, and one
10142 from the current iteration:
10143 p1 = initial_addr;
10144 msq_init = *(floor(p1))
10145 p2 = initial_addr + VS - 1;
10146 realignment_token = call target_builtin;
10147 indx = 0;
10148 loop {
10149 p2 = p2 + indx * vectype_size
10150 lsq = *(floor(p2))
10151 vec_dest = realign_load (msq, lsq, realignment_token)
10152 indx = indx + 1;
10153 msq = lsq;
10154 } */
10156 /* If the misalignment remains the same throughout the execution of the
10157 loop, we can create the init_addr and permutation mask at the loop
10158 preheader. Otherwise, it needs to be created inside the loop.
10159 This can only occur when vectorizing memory accesses in the inner-loop
10160 nested within an outer-loop that is being vectorized. */
10162 if (nested_in_vect_loop
10163 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
10164 GET_MODE_SIZE (TYPE_MODE (vectype))))
10166 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
10167 compute_in_loop = true;
10170 bool diff_first_stmt_info
10171 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
10173 tree offset = NULL_TREE;
10174 if ((alignment_support_scheme == dr_explicit_realign_optimized
10175 || alignment_support_scheme == dr_explicit_realign)
10176 && !compute_in_loop)
10178 /* If we have different first_stmt_info, we can't set up realignment
10179 here, since we can't guarantee first_stmt_info DR has been
10180 initialized yet, use first_stmt_info_for_drptr DR by bumping the
10181 distance from first_stmt_info DR instead as below. */
10182 if (!costing_p)
10184 if (!diff_first_stmt_info)
10185 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
10186 &realignment_token,
10187 alignment_support_scheme, NULL_TREE,
10188 &at_loop);
10189 if (alignment_support_scheme == dr_explicit_realign_optimized)
10191 phi = as_a<gphi *> (SSA_NAME_DEF_STMT (msq));
10192 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
10193 size_one_node);
10194 gcc_assert (!first_stmt_info_for_drptr);
10198 else
10199 at_loop = loop;
10201 if (!known_eq (poffset, 0))
10202 offset = (offset
10203 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
10204 : size_int (poffset));
10206 tree bump;
10207 tree vec_offset = NULL_TREE;
10208 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10210 aggr_type = NULL_TREE;
10211 bump = NULL_TREE;
10213 else if (memory_access_type == VMAT_GATHER_SCATTER)
10215 aggr_type = elem_type;
10216 if (!costing_p)
10217 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
10218 &bump, &vec_offset, loop_lens);
10220 else
10222 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10223 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
10224 else
10225 aggr_type = vectype;
10226 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
10227 memory_access_type, loop_lens);
10230 auto_vec<tree> vec_offsets;
10231 auto_vec<tree> vec_masks;
10232 if (mask && !costing_p)
10234 if (slp_node)
10235 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
10236 &vec_masks);
10237 else
10238 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
10239 &vec_masks, mask_vectype);
10241 tree vec_mask = NULL_TREE;
10242 poly_uint64 group_elt = 0;
10243 unsigned int inside_cost = 0, prologue_cost = 0;
10244 for (j = 0; j < ncopies; j++)
10246 /* 1. Create the vector or array pointer update chain. */
10247 if (j == 0 && !costing_p)
10249 bool simd_lane_access_p
10250 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
10251 if (simd_lane_access_p
10252 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
10253 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
10254 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
10255 && integer_zerop (DR_INIT (first_dr_info->dr))
10256 && alias_sets_conflict_p (get_alias_set (aggr_type),
10257 get_alias_set (TREE_TYPE (ref_type)))
10258 && (alignment_support_scheme == dr_aligned
10259 || alignment_support_scheme == dr_unaligned_supported))
10261 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
10262 dataref_offset = build_int_cst (ref_type, 0);
10264 else if (diff_first_stmt_info)
10266 dataref_ptr
10267 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
10268 aggr_type, at_loop, offset, &dummy,
10269 gsi, &ptr_incr, simd_lane_access_p,
10270 bump);
10271 /* Adjust the pointer by the difference to first_stmt. */
10272 data_reference_p ptrdr
10273 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
10274 tree diff
10275 = fold_convert (sizetype,
10276 size_binop (MINUS_EXPR,
10277 DR_INIT (first_dr_info->dr),
10278 DR_INIT (ptrdr)));
10279 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10280 stmt_info, diff);
10281 if (alignment_support_scheme == dr_explicit_realign)
10283 msq = vect_setup_realignment (vinfo,
10284 first_stmt_info_for_drptr, gsi,
10285 &realignment_token,
10286 alignment_support_scheme,
10287 dataref_ptr, &at_loop);
10288 gcc_assert (!compute_in_loop);
10291 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10293 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
10294 slp_node, &gs_info, &dataref_ptr,
10295 &vec_offsets);
10297 else
10298 dataref_ptr
10299 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10300 at_loop,
10301 offset, &dummy, gsi, &ptr_incr,
10302 simd_lane_access_p, bump);
10303 if (mask)
10304 vec_mask = vec_masks[0];
10306 else if (!costing_p)
10308 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10309 if (dataref_offset)
10310 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
10311 bump);
10312 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10313 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10314 stmt_info, bump);
10315 if (mask)
10316 vec_mask = vec_masks[j];
10319 if (grouped_load || slp_perm)
10320 dr_chain.create (vec_num);
10322 gimple *new_stmt = NULL;
10323 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10325 if (costing_p)
10327 /* An IFN_LOAD_LANES will load all its vector results,
10328 regardless of which ones we actually need. Account
10329 for the cost of unused results. */
10330 if (grouped_load && first_stmt_info == stmt_info)
10332 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
10333 stmt_vec_info next_stmt_info = first_stmt_info;
10336 gaps -= 1;
10337 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10339 while (next_stmt_info);
10340 if (gaps)
10342 if (dump_enabled_p ())
10343 dump_printf_loc (MSG_NOTE, vect_location,
10344 "vect_model_load_cost: %d "
10345 "unused vectors.\n",
10346 gaps);
10347 vect_get_load_cost (vinfo, stmt_info, gaps,
10348 alignment_support_scheme,
10349 misalignment, false, &inside_cost,
10350 &prologue_cost, cost_vec, cost_vec,
10351 true);
10354 vect_get_load_cost (vinfo, stmt_info, 1, alignment_support_scheme,
10355 misalignment, false, &inside_cost,
10356 &prologue_cost, cost_vec, cost_vec, true);
10357 continue;
10359 tree vec_array;
10361 vec_array = create_vector_array (vectype, vec_num);
10363 tree final_mask = NULL_TREE;
10364 if (loop_masks)
10365 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10366 ncopies, vectype, j);
10367 if (vec_mask)
10368 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
10369 final_mask, vec_mask, gsi);
10371 gcall *call;
10372 if (final_mask)
10374 /* Emit:
10375 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10376 VEC_MASK). */
10377 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10378 tree alias_ptr = build_int_cst (ref_type, align);
10379 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
10380 dataref_ptr, alias_ptr,
10381 final_mask);
10383 else
10385 /* Emit:
10386 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
10387 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
10388 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
10390 gimple_call_set_lhs (call, vec_array);
10391 gimple_call_set_nothrow (call, true);
10392 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
10393 new_stmt = call;
10395 /* Extract each vector into an SSA_NAME. */
10396 for (i = 0; i < vec_num; i++)
10398 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
10399 vec_array, i);
10400 dr_chain.quick_push (new_temp);
10403 /* Record the mapping between SSA_NAMEs and statements. */
10404 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
10406 /* Record that VEC_ARRAY is now dead. */
10407 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
10409 else
10411 for (i = 0; i < vec_num; i++)
10413 tree final_mask = NULL_TREE;
10414 tree final_len = NULL_TREE;
10415 tree bias = NULL_TREE;
10416 if (!costing_p)
10418 if (loop_masks && memory_access_type != VMAT_INVARIANT)
10419 final_mask
10420 = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10421 vec_num * ncopies, vectype,
10422 vec_num * j + i);
10423 if (vec_mask)
10424 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
10425 final_mask, vec_mask, gsi);
10427 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10428 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10429 gsi, stmt_info, bump);
10432 /* 2. Create the vector-load in the loop. */
10433 switch (alignment_support_scheme)
10435 case dr_aligned:
10436 case dr_unaligned_supported:
10438 unsigned int misalign;
10439 unsigned HOST_WIDE_INT align;
10441 if (memory_access_type == VMAT_GATHER_SCATTER
10442 && gs_info.ifn != IFN_LAST)
10444 if (costing_p)
10446 unsigned int cnunits
10447 = vect_nunits_for_cost (vectype);
10448 inside_cost
10449 = record_stmt_cost (cost_vec, cnunits,
10450 scalar_load, stmt_info, 0,
10451 vect_body);
10452 break;
10454 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10455 vec_offset = vec_offsets[vec_num * j + i];
10456 tree zero = build_zero_cst (vectype);
10457 tree scale = size_int (gs_info.scale);
10459 if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
10461 if (loop_lens)
10462 final_len
10463 = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10464 vec_num * ncopies, vectype,
10465 vec_num * j + i, 1);
10466 else
10467 final_len = build_int_cst (sizetype,
10468 TYPE_VECTOR_SUBPARTS (
10469 vectype));
10470 signed char biasval
10471 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10472 bias = build_int_cst (intQI_type_node, biasval);
10473 if (!final_mask)
10475 mask_vectype = truth_type_for (vectype);
10476 final_mask = build_minus_one_cst (mask_vectype);
10480 gcall *call;
10481 if (final_len && final_mask)
10482 call = gimple_build_call_internal (
10483 IFN_MASK_LEN_GATHER_LOAD, 7, dataref_ptr,
10484 vec_offset, scale, zero, final_mask, final_len,
10485 bias);
10486 else if (final_mask)
10487 call = gimple_build_call_internal
10488 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
10489 vec_offset, scale, zero, final_mask);
10490 else
10491 call = gimple_build_call_internal
10492 (IFN_GATHER_LOAD, 4, dataref_ptr,
10493 vec_offset, scale, zero);
10494 gimple_call_set_nothrow (call, true);
10495 new_stmt = call;
10496 data_ref = NULL_TREE;
10497 break;
10499 else if (memory_access_type == VMAT_GATHER_SCATTER)
10501 /* Emulated gather-scatter. */
10502 gcc_assert (!final_mask);
10503 unsigned HOST_WIDE_INT const_nunits
10504 = nunits.to_constant ();
10505 if (costing_p)
10507 /* For emulated gathers N offset vector element
10508 offset add is consumed by the load). */
10509 inside_cost
10510 = record_stmt_cost (cost_vec, const_nunits,
10511 vec_to_scalar, stmt_info, 0,
10512 vect_body);
10513 /* N scalar loads plus gathering them into a
10514 vector. */
10515 inside_cost
10516 = record_stmt_cost (cost_vec, const_nunits,
10517 scalar_load, stmt_info, 0,
10518 vect_body);
10519 inside_cost
10520 = record_stmt_cost (cost_vec, 1, vec_construct,
10521 stmt_info, 0, vect_body);
10522 break;
10524 unsigned HOST_WIDE_INT const_offset_nunits
10525 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
10526 .to_constant ();
10527 vec<constructor_elt, va_gc> *ctor_elts;
10528 vec_alloc (ctor_elts, const_nunits);
10529 gimple_seq stmts = NULL;
10530 /* We support offset vectors with more elements
10531 than the data vector for now. */
10532 unsigned HOST_WIDE_INT factor
10533 = const_offset_nunits / const_nunits;
10534 vec_offset = vec_offsets[j / factor];
10535 unsigned elt_offset = (j % factor) * const_nunits;
10536 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
10537 tree scale = size_int (gs_info.scale);
10538 align
10539 = get_object_alignment (DR_REF (first_dr_info->dr));
10540 tree ltype = build_aligned_type (TREE_TYPE (vectype),
10541 align);
10542 for (unsigned k = 0; k < const_nunits; ++k)
10544 tree boff = size_binop (MULT_EXPR,
10545 TYPE_SIZE (idx_type),
10546 bitsize_int
10547 (k + elt_offset));
10548 tree idx = gimple_build (&stmts, BIT_FIELD_REF,
10549 idx_type, vec_offset,
10550 TYPE_SIZE (idx_type),
10551 boff);
10552 idx = gimple_convert (&stmts, sizetype, idx);
10553 idx = gimple_build (&stmts, MULT_EXPR,
10554 sizetype, idx, scale);
10555 tree ptr = gimple_build (&stmts, PLUS_EXPR,
10556 TREE_TYPE (dataref_ptr),
10557 dataref_ptr, idx);
10558 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
10559 tree elt = make_ssa_name (TREE_TYPE (vectype));
10560 tree ref = build2 (MEM_REF, ltype, ptr,
10561 build_int_cst (ref_type, 0));
10562 new_stmt = gimple_build_assign (elt, ref);
10563 gimple_set_vuse (new_stmt,
10564 gimple_vuse (gsi_stmt (*gsi)));
10565 gimple_seq_add_stmt (&stmts, new_stmt);
10566 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
10568 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
10569 new_stmt = gimple_build_assign (NULL_TREE,
10570 build_constructor
10571 (vectype, ctor_elts));
10572 data_ref = NULL_TREE;
10573 break;
10576 if (costing_p)
10577 break;
10579 align =
10580 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
10581 if (alignment_support_scheme == dr_aligned)
10582 misalign = 0;
10583 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
10585 align = dr_alignment
10586 (vect_dr_behavior (vinfo, first_dr_info));
10587 misalign = 0;
10589 else
10590 misalign = misalignment;
10591 if (dataref_offset == NULL_TREE
10592 && TREE_CODE (dataref_ptr) == SSA_NAME)
10593 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
10594 align, misalign);
10595 align = least_bit_hwi (misalign | align);
10597 /* Compute IFN when LOOP_LENS or final_mask valid. */
10598 machine_mode vmode = TYPE_MODE (vectype);
10599 machine_mode new_vmode = vmode;
10600 internal_fn partial_ifn = IFN_LAST;
10601 if (loop_lens)
10603 opt_machine_mode new_ovmode
10604 = get_len_load_store_mode (vmode, true,
10605 &partial_ifn);
10606 new_vmode = new_ovmode.require ();
10607 unsigned factor = (new_ovmode == vmode)
10609 : GET_MODE_UNIT_SIZE (vmode);
10610 final_len
10611 = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10612 vec_num * ncopies, vectype,
10613 vec_num * j + i, factor);
10615 else if (final_mask)
10617 if (!can_vec_mask_load_store_p (
10618 vmode, TYPE_MODE (TREE_TYPE (final_mask)), true,
10619 &partial_ifn))
10620 gcc_unreachable ();
10623 if (partial_ifn == IFN_MASK_LEN_LOAD)
10625 if (!final_len)
10627 /* Pass VF value to 'len' argument of
10628 MASK_LEN_LOAD if LOOP_LENS is invalid. */
10629 tree iv_type
10630 = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
10631 final_len
10632 = build_int_cst (iv_type,
10633 TYPE_VECTOR_SUBPARTS (vectype));
10635 if (!final_mask)
10637 /* Pass all ones value to 'mask' argument of
10638 MASK_LEN_LOAD if final_mask is invalid. */
10639 mask_vectype = truth_type_for (vectype);
10640 final_mask = build_minus_one_cst (mask_vectype);
10643 if (final_len)
10645 signed char biasval
10646 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10648 bias = build_int_cst (intQI_type_node, biasval);
10651 if (final_len && memory_access_type != VMAT_INVARIANT)
10653 tree ptr
10654 = build_int_cst (ref_type, align * BITS_PER_UNIT);
10655 gcall *call;
10656 if (partial_ifn == IFN_MASK_LEN_LOAD)
10657 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD,
10658 5, dataref_ptr,
10659 ptr, final_mask,
10660 final_len, bias);
10661 else
10662 call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
10663 dataref_ptr, ptr,
10664 final_len, bias);
10665 gimple_call_set_nothrow (call, true);
10666 new_stmt = call;
10667 data_ref = NULL_TREE;
10669 /* Need conversion if it's wrapped with VnQI. */
10670 if (vmode != new_vmode)
10672 tree new_vtype = build_vector_type_for_mode (
10673 unsigned_intQI_type_node, new_vmode);
10674 tree var = vect_get_new_ssa_name (new_vtype,
10675 vect_simple_var);
10676 gimple_set_lhs (call, var);
10677 vect_finish_stmt_generation (vinfo, stmt_info, call,
10678 gsi);
10679 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
10680 new_stmt
10681 = gimple_build_assign (vec_dest,
10682 VIEW_CONVERT_EXPR, op);
10685 else if (final_mask)
10687 tree ptr = build_int_cst (ref_type,
10688 align * BITS_PER_UNIT);
10689 gcall *call
10690 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
10691 dataref_ptr, ptr,
10692 final_mask);
10693 gimple_call_set_nothrow (call, true);
10694 new_stmt = call;
10695 data_ref = NULL_TREE;
10697 else
10699 tree ltype = vectype;
10700 tree new_vtype = NULL_TREE;
10701 unsigned HOST_WIDE_INT gap
10702 = DR_GROUP_GAP (first_stmt_info);
10703 unsigned int vect_align
10704 = vect_known_alignment_in_bytes (first_dr_info,
10705 vectype);
10706 unsigned int scalar_dr_size
10707 = vect_get_scalar_dr_size (first_dr_info);
10708 /* If there's no peeling for gaps but we have a gap
10709 with slp loads then load the lower half of the
10710 vector only. See get_group_load_store_type for
10711 when we apply this optimization. */
10712 if (slp
10713 && loop_vinfo
10714 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
10715 && gap != 0
10716 && known_eq (nunits, (group_size - gap) * 2)
10717 && known_eq (nunits, group_size)
10718 && gap >= (vect_align / scalar_dr_size))
10720 tree half_vtype;
10721 new_vtype
10722 = vector_vector_composition_type (vectype, 2,
10723 &half_vtype);
10724 if (new_vtype != NULL_TREE)
10725 ltype = half_vtype;
10727 tree offset
10728 = (dataref_offset ? dataref_offset
10729 : build_int_cst (ref_type, 0));
10730 if (ltype != vectype
10731 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
10733 unsigned HOST_WIDE_INT gap_offset
10734 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
10735 tree gapcst = build_int_cst (ref_type, gap_offset);
10736 offset = size_binop (PLUS_EXPR, offset, gapcst);
10738 data_ref
10739 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
10740 if (alignment_support_scheme == dr_aligned)
10742 else
10743 TREE_TYPE (data_ref)
10744 = build_aligned_type (TREE_TYPE (data_ref),
10745 align * BITS_PER_UNIT);
10746 if (ltype != vectype)
10748 vect_copy_ref_info (data_ref,
10749 DR_REF (first_dr_info->dr));
10750 tree tem = make_ssa_name (ltype);
10751 new_stmt = gimple_build_assign (tem, data_ref);
10752 vect_finish_stmt_generation (vinfo, stmt_info,
10753 new_stmt, gsi);
10754 data_ref = NULL;
10755 vec<constructor_elt, va_gc> *v;
10756 vec_alloc (v, 2);
10757 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
10759 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10760 build_zero_cst (ltype));
10761 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
10763 else
10765 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
10766 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10767 build_zero_cst (ltype));
10769 gcc_assert (new_vtype != NULL_TREE);
10770 if (new_vtype == vectype)
10771 new_stmt = gimple_build_assign (
10772 vec_dest, build_constructor (vectype, v));
10773 else
10775 tree new_vname = make_ssa_name (new_vtype);
10776 new_stmt = gimple_build_assign (
10777 new_vname, build_constructor (new_vtype, v));
10778 vect_finish_stmt_generation (vinfo, stmt_info,
10779 new_stmt, gsi);
10780 new_stmt = gimple_build_assign (
10781 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
10782 new_vname));
10786 break;
10788 case dr_explicit_realign:
10790 if (costing_p)
10791 break;
10792 tree ptr, bump;
10794 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10796 if (compute_in_loop)
10797 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
10798 &realignment_token,
10799 dr_explicit_realign,
10800 dataref_ptr, NULL);
10802 if (TREE_CODE (dataref_ptr) == SSA_NAME)
10803 ptr = copy_ssa_name (dataref_ptr);
10804 else
10805 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
10806 // For explicit realign the target alignment should be
10807 // known at compile time.
10808 unsigned HOST_WIDE_INT align =
10809 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
10810 new_stmt = gimple_build_assign
10811 (ptr, BIT_AND_EXPR, dataref_ptr,
10812 build_int_cst
10813 (TREE_TYPE (dataref_ptr),
10814 -(HOST_WIDE_INT) align));
10815 vect_finish_stmt_generation (vinfo, stmt_info,
10816 new_stmt, gsi);
10817 data_ref
10818 = build2 (MEM_REF, vectype, ptr,
10819 build_int_cst (ref_type, 0));
10820 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10821 vec_dest = vect_create_destination_var (scalar_dest,
10822 vectype);
10823 new_stmt = gimple_build_assign (vec_dest, data_ref);
10824 new_temp = make_ssa_name (vec_dest, new_stmt);
10825 gimple_assign_set_lhs (new_stmt, new_temp);
10826 gimple_move_vops (new_stmt, stmt_info->stmt);
10827 vect_finish_stmt_generation (vinfo, stmt_info,
10828 new_stmt, gsi);
10829 msq = new_temp;
10831 bump = size_binop (MULT_EXPR, vs,
10832 TYPE_SIZE_UNIT (elem_type));
10833 bump = size_binop (MINUS_EXPR, bump, size_one_node);
10834 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
10835 stmt_info, bump);
10836 new_stmt = gimple_build_assign
10837 (NULL_TREE, BIT_AND_EXPR, ptr,
10838 build_int_cst
10839 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
10840 if (TREE_CODE (ptr) == SSA_NAME)
10841 ptr = copy_ssa_name (ptr, new_stmt);
10842 else
10843 ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
10844 gimple_assign_set_lhs (new_stmt, ptr);
10845 vect_finish_stmt_generation (vinfo, stmt_info,
10846 new_stmt, gsi);
10847 data_ref
10848 = build2 (MEM_REF, vectype, ptr,
10849 build_int_cst (ref_type, 0));
10850 break;
10852 case dr_explicit_realign_optimized:
10854 if (costing_p)
10855 break;
10856 if (TREE_CODE (dataref_ptr) == SSA_NAME)
10857 new_temp = copy_ssa_name (dataref_ptr);
10858 else
10859 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
10860 // We should only be doing this if we know the target
10861 // alignment at compile time.
10862 unsigned HOST_WIDE_INT align =
10863 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
10864 new_stmt = gimple_build_assign
10865 (new_temp, BIT_AND_EXPR, dataref_ptr,
10866 build_int_cst (TREE_TYPE (dataref_ptr),
10867 -(HOST_WIDE_INT) align));
10868 vect_finish_stmt_generation (vinfo, stmt_info,
10869 new_stmt, gsi);
10870 data_ref
10871 = build2 (MEM_REF, vectype, new_temp,
10872 build_int_cst (ref_type, 0));
10873 break;
10875 default:
10876 gcc_unreachable ();
10879 /* One common place to cost the above vect load for different
10880 alignment support schemes. */
10881 if (costing_p)
10883 /* For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we
10884 only need to take care of the first stmt, whose
10885 stmt_info is first_stmt_info, vec_num iterating on it
10886 will cover the cost for the remaining, it's consistent
10887 with transforming. For the prologue cost for realign,
10888 we only need to count it once for the whole group. */
10889 bool first_stmt_info_p = first_stmt_info == stmt_info;
10890 bool add_realign_cost = first_stmt_info_p && i == 0;
10891 if (memory_access_type == VMAT_CONTIGUOUS
10892 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
10893 || (memory_access_type == VMAT_CONTIGUOUS_PERMUTE
10894 && (!grouped_load || first_stmt_info_p)))
10895 vect_get_load_cost (vinfo, stmt_info, 1,
10896 alignment_support_scheme, misalignment,
10897 add_realign_cost, &inside_cost,
10898 &prologue_cost, cost_vec, cost_vec,
10899 true);
10901 else
10903 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10904 /* DATA_REF is null if we've already built the statement. */
10905 if (data_ref)
10907 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10908 new_stmt = gimple_build_assign (vec_dest, data_ref);
10910 new_temp = make_ssa_name (vec_dest, new_stmt);
10911 gimple_set_lhs (new_stmt, new_temp);
10912 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10915 /* 3. Handle explicit realignment if necessary/supported.
10916 Create in loop:
10917 vec_dest = realign_load (msq, lsq, realignment_token) */
10918 if (!costing_p
10919 && (alignment_support_scheme == dr_explicit_realign_optimized
10920 || alignment_support_scheme == dr_explicit_realign))
10922 lsq = gimple_assign_lhs (new_stmt);
10923 if (!realignment_token)
10924 realignment_token = dataref_ptr;
10925 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10926 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
10927 msq, lsq, realignment_token);
10928 new_temp = make_ssa_name (vec_dest, new_stmt);
10929 gimple_assign_set_lhs (new_stmt, new_temp);
10930 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10932 if (alignment_support_scheme == dr_explicit_realign_optimized)
10934 gcc_assert (phi);
10935 if (i == vec_num - 1 && j == ncopies - 1)
10936 add_phi_arg (phi, lsq,
10937 loop_latch_edge (containing_loop),
10938 UNKNOWN_LOCATION);
10939 msq = lsq;
10943 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
10945 if (costing_p)
10946 inside_cost = record_stmt_cost (cost_vec, 1, vec_perm,
10947 stmt_info, 0, vect_body);
10948 else
10950 tree perm_mask = perm_mask_for_reverse (vectype);
10951 new_temp
10952 = permute_vec_elements (vinfo, new_temp, new_temp,
10953 perm_mask, stmt_info, gsi);
10954 new_stmt = SSA_NAME_DEF_STMT (new_temp);
10958 /* Collect vector loads and later create their permutation in
10959 vect_transform_grouped_load (). */
10960 if (!costing_p && (grouped_load || slp_perm))
10961 dr_chain.quick_push (new_temp);
10963 /* Store vector loads in the corresponding SLP_NODE. */
10964 if (!costing_p && slp && !slp_perm)
10965 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10967 /* With SLP permutation we load the gaps as well, without
10968 we need to skip the gaps after we manage to fully load
10969 all elements. group_gap_adj is DR_GROUP_SIZE here. */
10970 group_elt += nunits;
10971 if (!costing_p
10972 && maybe_ne (group_gap_adj, 0U)
10973 && !slp_perm
10974 && known_eq (group_elt, group_size - group_gap_adj))
10976 poly_wide_int bump_val
10977 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
10978 * group_gap_adj);
10979 if (tree_int_cst_sgn
10980 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
10981 bump_val = -bump_val;
10982 tree bump = wide_int_to_tree (sizetype, bump_val);
10983 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10984 gsi, stmt_info, bump);
10985 group_elt = 0;
10988 /* Bump the vector pointer to account for a gap or for excess
10989 elements loaded for a permuted SLP load. */
10990 if (!costing_p
10991 && maybe_ne (group_gap_adj, 0U)
10992 && slp_perm)
10994 poly_wide_int bump_val
10995 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
10996 * group_gap_adj);
10997 if (tree_int_cst_sgn
10998 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
10999 bump_val = -bump_val;
11000 tree bump = wide_int_to_tree (sizetype, bump_val);
11001 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11002 stmt_info, bump);
11006 if (slp && !slp_perm)
11007 continue;
11009 if (slp_perm)
11011 unsigned n_perms;
11012 /* For SLP we know we've seen all possible uses of dr_chain so
11013 direct vect_transform_slp_perm_load to DCE the unused parts.
11014 ??? This is a hack to prevent compile-time issues as seen
11015 in PR101120 and friends. */
11016 if (costing_p)
11018 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf,
11019 true, &n_perms, nullptr);
11020 inside_cost = record_stmt_cost (cost_vec, n_perms, vec_perm,
11021 stmt_info, 0, vect_body);
11023 else
11025 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
11026 gsi, vf, false, &n_perms,
11027 nullptr, true);
11028 gcc_assert (ok);
11031 else
11033 if (grouped_load)
11035 if (memory_access_type != VMAT_LOAD_STORE_LANES)
11037 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11038 /* We assume that the cost of a single load-lanes instruction
11039 is equivalent to the cost of DR_GROUP_SIZE separate loads.
11040 If a grouped access is instead being provided by a
11041 load-and-permute operation, include the cost of the
11042 permutes. */
11043 if (costing_p && first_stmt_info == stmt_info)
11045 /* Uses an even and odd extract operations or shuffle
11046 operations for each needed permute. */
11047 int group_size = DR_GROUP_SIZE (first_stmt_info);
11048 int nstmts = ceil_log2 (group_size) * group_size;
11049 inside_cost
11050 += record_stmt_cost (cost_vec, nstmts, vec_perm,
11051 stmt_info, 0, vect_body);
11053 if (dump_enabled_p ())
11054 dump_printf_loc (
11055 MSG_NOTE, vect_location,
11056 "vect_model_load_cost: strided group_size = %d .\n",
11057 group_size);
11059 else if (!costing_p)
11060 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
11061 group_size, gsi);
11063 if (!costing_p)
11064 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11066 else if (!costing_p)
11067 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11069 dr_chain.release ();
11071 if (!slp && !costing_p)
11072 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11074 if (costing_p)
11076 gcc_assert (memory_access_type != VMAT_INVARIANT
11077 && memory_access_type != VMAT_ELEMENTWISE
11078 && memory_access_type != VMAT_STRIDED_SLP);
11079 if (dump_enabled_p ())
11080 dump_printf_loc (MSG_NOTE, vect_location,
11081 "vect_model_load_cost: inside_cost = %u, "
11082 "prologue_cost = %u .\n",
11083 inside_cost, prologue_cost);
11086 return true;
11089 /* Function vect_is_simple_cond.
11091 Input:
11092 LOOP - the loop that is being vectorized.
11093 COND - Condition that is checked for simple use.
11095 Output:
11096 *COMP_VECTYPE - the vector type for the comparison.
11097 *DTS - The def types for the arguments of the comparison
11099 Returns whether a COND can be vectorized. Checks whether
11100 condition operands are supportable using vec_is_simple_use. */
11102 static bool
11103 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
11104 slp_tree slp_node, tree *comp_vectype,
11105 enum vect_def_type *dts, tree vectype)
11107 tree lhs, rhs;
11108 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11109 slp_tree slp_op;
11111 /* Mask case. */
11112 if (TREE_CODE (cond) == SSA_NAME
11113 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
11115 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
11116 &slp_op, &dts[0], comp_vectype)
11117 || !*comp_vectype
11118 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
11119 return false;
11120 return true;
11123 if (!COMPARISON_CLASS_P (cond))
11124 return false;
11126 lhs = TREE_OPERAND (cond, 0);
11127 rhs = TREE_OPERAND (cond, 1);
11129 if (TREE_CODE (lhs) == SSA_NAME)
11131 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
11132 &lhs, &slp_op, &dts[0], &vectype1))
11133 return false;
11135 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
11136 || TREE_CODE (lhs) == FIXED_CST)
11137 dts[0] = vect_constant_def;
11138 else
11139 return false;
11141 if (TREE_CODE (rhs) == SSA_NAME)
11143 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
11144 &rhs, &slp_op, &dts[1], &vectype2))
11145 return false;
11147 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
11148 || TREE_CODE (rhs) == FIXED_CST)
11149 dts[1] = vect_constant_def;
11150 else
11151 return false;
11153 if (vectype1 && vectype2
11154 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
11155 TYPE_VECTOR_SUBPARTS (vectype2)))
11156 return false;
11158 *comp_vectype = vectype1 ? vectype1 : vectype2;
11159 /* Invariant comparison. */
11160 if (! *comp_vectype)
11162 tree scalar_type = TREE_TYPE (lhs);
11163 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11164 *comp_vectype = truth_type_for (vectype);
11165 else
11167 /* If we can widen the comparison to match vectype do so. */
11168 if (INTEGRAL_TYPE_P (scalar_type)
11169 && !slp_node
11170 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
11171 TYPE_SIZE (TREE_TYPE (vectype))))
11172 scalar_type = build_nonstandard_integer_type
11173 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
11174 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
11175 slp_node);
11179 return true;
11182 /* vectorizable_condition.
11184 Check if STMT_INFO is conditional modify expression that can be vectorized.
11185 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
11186 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
11187 at GSI.
11189 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
11191 Return true if STMT_INFO is vectorizable in this way. */
11193 static bool
11194 vectorizable_condition (vec_info *vinfo,
11195 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11196 gimple **vec_stmt,
11197 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
11199 tree scalar_dest = NULL_TREE;
11200 tree vec_dest = NULL_TREE;
11201 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
11202 tree then_clause, else_clause;
11203 tree comp_vectype = NULL_TREE;
11204 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
11205 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
11206 tree vec_compare;
11207 tree new_temp;
11208 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
11209 enum vect_def_type dts[4]
11210 = {vect_unknown_def_type, vect_unknown_def_type,
11211 vect_unknown_def_type, vect_unknown_def_type};
11212 int ndts = 4;
11213 int ncopies;
11214 int vec_num;
11215 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
11216 int i;
11217 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
11218 vec<tree> vec_oprnds0 = vNULL;
11219 vec<tree> vec_oprnds1 = vNULL;
11220 vec<tree> vec_oprnds2 = vNULL;
11221 vec<tree> vec_oprnds3 = vNULL;
11222 tree vec_cmp_type;
11223 bool masked = false;
11225 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
11226 return false;
11228 /* Is vectorizable conditional operation? */
11229 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
11230 if (!stmt)
11231 return false;
11233 code = gimple_assign_rhs_code (stmt);
11234 if (code != COND_EXPR)
11235 return false;
11237 stmt_vec_info reduc_info = NULL;
11238 int reduc_index = -1;
11239 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
11240 bool for_reduction
11241 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
11242 if (for_reduction)
11244 if (slp_node)
11245 return false;
11246 reduc_info = info_for_reduction (vinfo, stmt_info);
11247 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
11248 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
11249 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
11250 || reduc_index != -1);
11252 else
11254 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
11255 return false;
11258 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
11259 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11261 if (slp_node)
11263 ncopies = 1;
11264 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
11266 else
11268 ncopies = vect_get_num_copies (loop_vinfo, vectype);
11269 vec_num = 1;
11272 gcc_assert (ncopies >= 1);
11273 if (for_reduction && ncopies > 1)
11274 return false; /* FORNOW */
11276 cond_expr = gimple_assign_rhs1 (stmt);
11278 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
11279 &comp_vectype, &dts[0], vectype)
11280 || !comp_vectype)
11281 return false;
11283 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
11284 slp_tree then_slp_node, else_slp_node;
11285 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
11286 &then_clause, &then_slp_node, &dts[2], &vectype1))
11287 return false;
11288 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
11289 &else_clause, &else_slp_node, &dts[3], &vectype2))
11290 return false;
11292 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
11293 return false;
11295 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
11296 return false;
11298 masked = !COMPARISON_CLASS_P (cond_expr);
11299 vec_cmp_type = truth_type_for (comp_vectype);
11301 if (vec_cmp_type == NULL_TREE)
11302 return false;
11304 cond_code = TREE_CODE (cond_expr);
11305 if (!masked)
11307 cond_expr0 = TREE_OPERAND (cond_expr, 0);
11308 cond_expr1 = TREE_OPERAND (cond_expr, 1);
11311 /* For conditional reductions, the "then" value needs to be the candidate
11312 value calculated by this iteration while the "else" value needs to be
11313 the result carried over from previous iterations. If the COND_EXPR
11314 is the other way around, we need to swap it. */
11315 bool must_invert_cmp_result = false;
11316 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
11318 if (masked)
11319 must_invert_cmp_result = true;
11320 else
11322 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
11323 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
11324 if (new_code == ERROR_MARK)
11325 must_invert_cmp_result = true;
11326 else
11328 cond_code = new_code;
11329 /* Make sure we don't accidentally use the old condition. */
11330 cond_expr = NULL_TREE;
11333 std::swap (then_clause, else_clause);
11336 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
11338 /* Boolean values may have another representation in vectors
11339 and therefore we prefer bit operations over comparison for
11340 them (which also works for scalar masks). We store opcodes
11341 to use in bitop1 and bitop2. Statement is vectorized as
11342 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
11343 depending on bitop1 and bitop2 arity. */
11344 switch (cond_code)
11346 case GT_EXPR:
11347 bitop1 = BIT_NOT_EXPR;
11348 bitop2 = BIT_AND_EXPR;
11349 break;
11350 case GE_EXPR:
11351 bitop1 = BIT_NOT_EXPR;
11352 bitop2 = BIT_IOR_EXPR;
11353 break;
11354 case LT_EXPR:
11355 bitop1 = BIT_NOT_EXPR;
11356 bitop2 = BIT_AND_EXPR;
11357 std::swap (cond_expr0, cond_expr1);
11358 break;
11359 case LE_EXPR:
11360 bitop1 = BIT_NOT_EXPR;
11361 bitop2 = BIT_IOR_EXPR;
11362 std::swap (cond_expr0, cond_expr1);
11363 break;
11364 case NE_EXPR:
11365 bitop1 = BIT_XOR_EXPR;
11366 break;
11367 case EQ_EXPR:
11368 bitop1 = BIT_XOR_EXPR;
11369 bitop2 = BIT_NOT_EXPR;
11370 break;
11371 default:
11372 return false;
11374 cond_code = SSA_NAME;
11377 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
11378 && reduction_type == EXTRACT_LAST_REDUCTION
11379 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
11381 if (dump_enabled_p ())
11382 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11383 "reduction comparison operation not supported.\n");
11384 return false;
11387 if (!vec_stmt)
11389 if (bitop1 != NOP_EXPR)
11391 machine_mode mode = TYPE_MODE (comp_vectype);
11392 optab optab;
11394 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
11395 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
11396 return false;
11398 if (bitop2 != NOP_EXPR)
11400 optab = optab_for_tree_code (bitop2, comp_vectype,
11401 optab_default);
11402 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
11403 return false;
11407 vect_cost_for_stmt kind = vector_stmt;
11408 if (reduction_type == EXTRACT_LAST_REDUCTION)
11409 /* Count one reduction-like operation per vector. */
11410 kind = vec_to_scalar;
11411 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)
11412 && (masked
11413 || (!expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type,
11414 cond_code)
11415 || !expand_vec_cond_expr_p (vectype, vec_cmp_type,
11416 ERROR_MARK))))
11417 return false;
11419 if (slp_node
11420 && (!vect_maybe_update_slp_op_vectype
11421 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
11422 || (op_adjust == 1
11423 && !vect_maybe_update_slp_op_vectype
11424 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
11425 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
11426 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
11428 if (dump_enabled_p ())
11429 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11430 "incompatible vector types for invariants\n");
11431 return false;
11434 if (loop_vinfo && for_reduction
11435 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
11437 if (reduction_type == EXTRACT_LAST_REDUCTION)
11438 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
11439 ncopies * vec_num, vectype, NULL);
11440 /* Extra inactive lanes should be safe for vect_nested_cycle. */
11441 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
11443 if (dump_enabled_p ())
11444 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11445 "conditional reduction prevents the use"
11446 " of partial vectors.\n");
11447 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
11451 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
11452 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
11453 cost_vec, kind);
11454 return true;
11457 /* Transform. */
11459 /* Handle def. */
11460 scalar_dest = gimple_assign_lhs (stmt);
11461 if (reduction_type != EXTRACT_LAST_REDUCTION)
11462 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11464 bool swap_cond_operands = false;
11466 /* See whether another part of the vectorized code applies a loop
11467 mask to the condition, or to its inverse. */
11469 vec_loop_masks *masks = NULL;
11470 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
11472 if (reduction_type == EXTRACT_LAST_REDUCTION)
11473 masks = &LOOP_VINFO_MASKS (loop_vinfo);
11474 else
11476 scalar_cond_masked_key cond (cond_expr, ncopies);
11477 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
11478 masks = &LOOP_VINFO_MASKS (loop_vinfo);
11479 else
11481 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
11482 tree_code orig_code = cond.code;
11483 cond.code = invert_tree_comparison (cond.code, honor_nans);
11484 if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
11486 masks = &LOOP_VINFO_MASKS (loop_vinfo);
11487 cond_code = cond.code;
11488 swap_cond_operands = true;
11490 else
11492 /* Try the inverse of the current mask. We check if the
11493 inverse mask is live and if so we generate a negate of
11494 the current mask such that we still honor NaNs. */
11495 cond.inverted_p = true;
11496 cond.code = orig_code;
11497 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
11499 masks = &LOOP_VINFO_MASKS (loop_vinfo);
11500 cond_code = cond.code;
11501 swap_cond_operands = true;
11502 must_invert_cmp_result = true;
11509 /* Handle cond expr. */
11510 if (masked)
11511 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
11512 cond_expr, &vec_oprnds0, comp_vectype,
11513 then_clause, &vec_oprnds2, vectype,
11514 reduction_type != EXTRACT_LAST_REDUCTION
11515 ? else_clause : NULL, &vec_oprnds3, vectype);
11516 else
11517 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
11518 cond_expr0, &vec_oprnds0, comp_vectype,
11519 cond_expr1, &vec_oprnds1, comp_vectype,
11520 then_clause, &vec_oprnds2, vectype,
11521 reduction_type != EXTRACT_LAST_REDUCTION
11522 ? else_clause : NULL, &vec_oprnds3, vectype);
11524 /* Arguments are ready. Create the new vector stmt. */
11525 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
11527 vec_then_clause = vec_oprnds2[i];
11528 if (reduction_type != EXTRACT_LAST_REDUCTION)
11529 vec_else_clause = vec_oprnds3[i];
11531 if (swap_cond_operands)
11532 std::swap (vec_then_clause, vec_else_clause);
11534 if (masked)
11535 vec_compare = vec_cond_lhs;
11536 else
11538 vec_cond_rhs = vec_oprnds1[i];
11539 if (bitop1 == NOP_EXPR)
11541 gimple_seq stmts = NULL;
11542 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
11543 vec_cond_lhs, vec_cond_rhs);
11544 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
11546 else
11548 new_temp = make_ssa_name (vec_cmp_type);
11549 gassign *new_stmt;
11550 if (bitop1 == BIT_NOT_EXPR)
11551 new_stmt = gimple_build_assign (new_temp, bitop1,
11552 vec_cond_rhs);
11553 else
11554 new_stmt
11555 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
11556 vec_cond_rhs);
11557 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11558 if (bitop2 == NOP_EXPR)
11559 vec_compare = new_temp;
11560 else if (bitop2 == BIT_NOT_EXPR
11561 && reduction_type != EXTRACT_LAST_REDUCTION)
11563 /* Instead of doing ~x ? y : z do x ? z : y. */
11564 vec_compare = new_temp;
11565 std::swap (vec_then_clause, vec_else_clause);
11567 else
11569 vec_compare = make_ssa_name (vec_cmp_type);
11570 if (bitop2 == BIT_NOT_EXPR)
11571 new_stmt
11572 = gimple_build_assign (vec_compare, bitop2, new_temp);
11573 else
11574 new_stmt
11575 = gimple_build_assign (vec_compare, bitop2,
11576 vec_cond_lhs, new_temp);
11577 vect_finish_stmt_generation (vinfo, stmt_info,
11578 new_stmt, gsi);
11583 /* If we decided to apply a loop mask to the result of the vector
11584 comparison, AND the comparison with the mask now. Later passes
11585 should then be able to reuse the AND results between mulitple
11586 vector statements.
11588 For example:
11589 for (int i = 0; i < 100; ++i)
11590 x[i] = y[i] ? z[i] : 10;
11592 results in following optimized GIMPLE:
11594 mask__35.8_43 = vect__4.7_41 != { 0, ... };
11595 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
11596 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
11597 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
11598 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
11599 vect_iftmp.11_47, { 10, ... }>;
11601 instead of using a masked and unmasked forms of
11602 vec != { 0, ... } (masked in the MASK_LOAD,
11603 unmasked in the VEC_COND_EXPR). */
11605 /* Force vec_compare to be an SSA_NAME rather than a comparison,
11606 in cases where that's necessary. */
11608 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
11610 if (!is_gimple_val (vec_compare))
11612 tree vec_compare_name = make_ssa_name (vec_cmp_type);
11613 gassign *new_stmt = gimple_build_assign (vec_compare_name,
11614 vec_compare);
11615 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11616 vec_compare = vec_compare_name;
11619 if (must_invert_cmp_result)
11621 tree vec_compare_name = make_ssa_name (vec_cmp_type);
11622 gassign *new_stmt = gimple_build_assign (vec_compare_name,
11623 BIT_NOT_EXPR,
11624 vec_compare);
11625 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11626 vec_compare = vec_compare_name;
11629 if (masks)
11631 tree loop_mask
11632 = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num * ncopies,
11633 vectype, i);
11634 tree tmp2 = make_ssa_name (vec_cmp_type);
11635 gassign *g
11636 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
11637 loop_mask);
11638 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
11639 vec_compare = tmp2;
11643 gimple *new_stmt;
11644 if (reduction_type == EXTRACT_LAST_REDUCTION)
11646 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
11647 tree lhs = gimple_get_lhs (old_stmt);
11648 new_stmt = gimple_build_call_internal
11649 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
11650 vec_then_clause);
11651 gimple_call_set_lhs (new_stmt, lhs);
11652 SSA_NAME_DEF_STMT (lhs) = new_stmt;
11653 if (old_stmt == gsi_stmt (*gsi))
11654 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
11655 else
11657 /* In this case we're moving the definition to later in the
11658 block. That doesn't matter because the only uses of the
11659 lhs are in phi statements. */
11660 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
11661 gsi_remove (&old_gsi, true);
11662 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11665 else
11667 new_temp = make_ssa_name (vec_dest);
11668 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
11669 vec_then_clause, vec_else_clause);
11670 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11672 if (slp_node)
11673 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
11674 else
11675 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11678 if (!slp_node)
11679 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11681 vec_oprnds0.release ();
11682 vec_oprnds1.release ();
11683 vec_oprnds2.release ();
11684 vec_oprnds3.release ();
11686 return true;
11689 /* vectorizable_comparison.
11691 Check if STMT_INFO is comparison expression that can be vectorized.
11692 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
11693 comparison, put it in VEC_STMT, and insert it at GSI.
11695 Return true if STMT_INFO is vectorizable in this way. */
11697 static bool
11698 vectorizable_comparison (vec_info *vinfo,
11699 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11700 gimple **vec_stmt,
11701 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
11703 tree lhs, rhs1, rhs2;
11704 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11705 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
11706 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
11707 tree new_temp;
11708 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
11709 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
11710 int ndts = 2;
11711 poly_uint64 nunits;
11712 int ncopies;
11713 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
11714 int i;
11715 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
11716 vec<tree> vec_oprnds0 = vNULL;
11717 vec<tree> vec_oprnds1 = vNULL;
11718 tree mask_type;
11719 tree mask;
11721 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
11722 return false;
11724 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
11725 return false;
11727 mask_type = vectype;
11728 nunits = TYPE_VECTOR_SUBPARTS (vectype);
11730 if (slp_node)
11731 ncopies = 1;
11732 else
11733 ncopies = vect_get_num_copies (loop_vinfo, vectype);
11735 gcc_assert (ncopies >= 1);
11736 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
11737 return false;
11739 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
11740 if (!stmt)
11741 return false;
11743 code = gimple_assign_rhs_code (stmt);
11745 if (TREE_CODE_CLASS (code) != tcc_comparison)
11746 return false;
11748 slp_tree slp_rhs1, slp_rhs2;
11749 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
11750 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
11751 return false;
11753 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
11754 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
11755 return false;
11757 if (vectype1 && vectype2
11758 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
11759 TYPE_VECTOR_SUBPARTS (vectype2)))
11760 return false;
11762 vectype = vectype1 ? vectype1 : vectype2;
11764 /* Invariant comparison. */
11765 if (!vectype)
11767 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
11768 vectype = mask_type;
11769 else
11770 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
11771 slp_node);
11772 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
11773 return false;
11775 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
11776 return false;
11778 /* Can't compare mask and non-mask types. */
11779 if (vectype1 && vectype2
11780 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
11781 return false;
11783 /* Boolean values may have another representation in vectors
11784 and therefore we prefer bit operations over comparison for
11785 them (which also works for scalar masks). We store opcodes
11786 to use in bitop1 and bitop2. Statement is vectorized as
11787 BITOP2 (rhs1 BITOP1 rhs2) or
11788 rhs1 BITOP2 (BITOP1 rhs2)
11789 depending on bitop1 and bitop2 arity. */
11790 bool swap_p = false;
11791 if (VECTOR_BOOLEAN_TYPE_P (vectype))
11793 if (code == GT_EXPR)
11795 bitop1 = BIT_NOT_EXPR;
11796 bitop2 = BIT_AND_EXPR;
11798 else if (code == GE_EXPR)
11800 bitop1 = BIT_NOT_EXPR;
11801 bitop2 = BIT_IOR_EXPR;
11803 else if (code == LT_EXPR)
11805 bitop1 = BIT_NOT_EXPR;
11806 bitop2 = BIT_AND_EXPR;
11807 swap_p = true;
11809 else if (code == LE_EXPR)
11811 bitop1 = BIT_NOT_EXPR;
11812 bitop2 = BIT_IOR_EXPR;
11813 swap_p = true;
11815 else
11817 bitop1 = BIT_XOR_EXPR;
11818 if (code == EQ_EXPR)
11819 bitop2 = BIT_NOT_EXPR;
11823 if (!vec_stmt)
11825 if (bitop1 == NOP_EXPR)
11827 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
11828 return false;
11830 else
11832 machine_mode mode = TYPE_MODE (vectype);
11833 optab optab;
11835 optab = optab_for_tree_code (bitop1, vectype, optab_default);
11836 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
11837 return false;
11839 if (bitop2 != NOP_EXPR)
11841 optab = optab_for_tree_code (bitop2, vectype, optab_default);
11842 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
11843 return false;
11847 /* Put types on constant and invariant SLP children. */
11848 if (slp_node
11849 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
11850 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
11852 if (dump_enabled_p ())
11853 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11854 "incompatible vector types for invariants\n");
11855 return false;
11858 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
11859 vect_model_simple_cost (vinfo, stmt_info,
11860 ncopies * (1 + (bitop2 != NOP_EXPR)),
11861 dts, ndts, slp_node, cost_vec);
11862 return true;
11865 /* Transform. */
11867 /* Handle def. */
11868 lhs = gimple_assign_lhs (stmt);
11869 mask = vect_create_destination_var (lhs, mask_type);
11871 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
11872 rhs1, &vec_oprnds0, vectype,
11873 rhs2, &vec_oprnds1, vectype);
11874 if (swap_p)
11875 std::swap (vec_oprnds0, vec_oprnds1);
11877 /* Arguments are ready. Create the new vector stmt. */
11878 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
11880 gimple *new_stmt;
11881 vec_rhs2 = vec_oprnds1[i];
11883 new_temp = make_ssa_name (mask);
11884 if (bitop1 == NOP_EXPR)
11886 new_stmt = gimple_build_assign (new_temp, code,
11887 vec_rhs1, vec_rhs2);
11888 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11890 else
11892 if (bitop1 == BIT_NOT_EXPR)
11893 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
11894 else
11895 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
11896 vec_rhs2);
11897 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11898 if (bitop2 != NOP_EXPR)
11900 tree res = make_ssa_name (mask);
11901 if (bitop2 == BIT_NOT_EXPR)
11902 new_stmt = gimple_build_assign (res, bitop2, new_temp);
11903 else
11904 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
11905 new_temp);
11906 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11909 if (slp_node)
11910 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
11911 else
11912 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11915 if (!slp_node)
11916 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11918 vec_oprnds0.release ();
11919 vec_oprnds1.release ();
11921 return true;
11924 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
11925 can handle all live statements in the node. Otherwise return true
11926 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
11927 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
11929 static bool
11930 can_vectorize_live_stmts (vec_info *vinfo,
11931 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11932 slp_tree slp_node, slp_instance slp_node_instance,
11933 bool vec_stmt_p,
11934 stmt_vector_for_cost *cost_vec)
11936 if (slp_node)
11938 stmt_vec_info slp_stmt_info;
11939 unsigned int i;
11940 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
11942 if (STMT_VINFO_LIVE_P (slp_stmt_info)
11943 && !vectorizable_live_operation (vinfo,
11944 slp_stmt_info, gsi, slp_node,
11945 slp_node_instance, i,
11946 vec_stmt_p, cost_vec))
11947 return false;
11950 else if (STMT_VINFO_LIVE_P (stmt_info)
11951 && !vectorizable_live_operation (vinfo, stmt_info, gsi,
11952 slp_node, slp_node_instance, -1,
11953 vec_stmt_p, cost_vec))
11954 return false;
11956 return true;
11959 /* Make sure the statement is vectorizable. */
11961 opt_result
11962 vect_analyze_stmt (vec_info *vinfo,
11963 stmt_vec_info stmt_info, bool *need_to_vectorize,
11964 slp_tree node, slp_instance node_instance,
11965 stmt_vector_for_cost *cost_vec)
11967 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
11968 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
11969 bool ok;
11970 gimple_seq pattern_def_seq;
11972 if (dump_enabled_p ())
11973 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
11974 stmt_info->stmt);
11976 if (gimple_has_volatile_ops (stmt_info->stmt))
11977 return opt_result::failure_at (stmt_info->stmt,
11978 "not vectorized:"
11979 " stmt has volatile operands: %G\n",
11980 stmt_info->stmt);
11982 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11983 && node == NULL
11984 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
11986 gimple_stmt_iterator si;
11988 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
11990 stmt_vec_info pattern_def_stmt_info
11991 = vinfo->lookup_stmt (gsi_stmt (si));
11992 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
11993 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
11995 /* Analyze def stmt of STMT if it's a pattern stmt. */
11996 if (dump_enabled_p ())
11997 dump_printf_loc (MSG_NOTE, vect_location,
11998 "==> examining pattern def statement: %G",
11999 pattern_def_stmt_info->stmt);
12001 opt_result res
12002 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
12003 need_to_vectorize, node, node_instance,
12004 cost_vec);
12005 if (!res)
12006 return res;
12011 /* Skip stmts that do not need to be vectorized. In loops this is expected
12012 to include:
12013 - the COND_EXPR which is the loop exit condition
12014 - any LABEL_EXPRs in the loop
12015 - computations that are used only for array indexing or loop control.
12016 In basic blocks we only analyze statements that are a part of some SLP
12017 instance, therefore, all the statements are relevant.
12019 Pattern statement needs to be analyzed instead of the original statement
12020 if the original statement is not relevant. Otherwise, we analyze both
12021 statements. In basic blocks we are called from some SLP instance
12022 traversal, don't analyze pattern stmts instead, the pattern stmts
12023 already will be part of SLP instance. */
12025 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
12026 if (!STMT_VINFO_RELEVANT_P (stmt_info)
12027 && !STMT_VINFO_LIVE_P (stmt_info))
12029 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12030 && pattern_stmt_info
12031 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
12032 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
12034 /* Analyze PATTERN_STMT instead of the original stmt. */
12035 stmt_info = pattern_stmt_info;
12036 if (dump_enabled_p ())
12037 dump_printf_loc (MSG_NOTE, vect_location,
12038 "==> examining pattern statement: %G",
12039 stmt_info->stmt);
12041 else
12043 if (dump_enabled_p ())
12044 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
12046 return opt_result::success ();
12049 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12050 && node == NULL
12051 && pattern_stmt_info
12052 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
12053 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
12055 /* Analyze PATTERN_STMT too. */
12056 if (dump_enabled_p ())
12057 dump_printf_loc (MSG_NOTE, vect_location,
12058 "==> examining pattern statement: %G",
12059 pattern_stmt_info->stmt);
12061 opt_result res
12062 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
12063 node_instance, cost_vec);
12064 if (!res)
12065 return res;
12068 switch (STMT_VINFO_DEF_TYPE (stmt_info))
12070 case vect_internal_def:
12071 break;
12073 case vect_reduction_def:
12074 case vect_nested_cycle:
12075 gcc_assert (!bb_vinfo
12076 && (relevance == vect_used_in_outer
12077 || relevance == vect_used_in_outer_by_reduction
12078 || relevance == vect_used_by_reduction
12079 || relevance == vect_unused_in_scope
12080 || relevance == vect_used_only_live));
12081 break;
12083 case vect_induction_def:
12084 case vect_first_order_recurrence:
12085 gcc_assert (!bb_vinfo);
12086 break;
12088 case vect_constant_def:
12089 case vect_external_def:
12090 case vect_unknown_def_type:
12091 default:
12092 gcc_unreachable ();
12095 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
12096 if (node)
12097 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
12099 if (STMT_VINFO_RELEVANT_P (stmt_info))
12101 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
12102 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
12103 || (call && gimple_call_lhs (call) == NULL_TREE));
12104 *need_to_vectorize = true;
12107 if (PURE_SLP_STMT (stmt_info) && !node)
12109 if (dump_enabled_p ())
12110 dump_printf_loc (MSG_NOTE, vect_location,
12111 "handled only by SLP analysis\n");
12112 return opt_result::success ();
12115 ok = true;
12116 if (!bb_vinfo
12117 && (STMT_VINFO_RELEVANT_P (stmt_info)
12118 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
12119 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
12120 -mveclibabi= takes preference over library functions with
12121 the simd attribute. */
12122 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12123 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
12124 cost_vec)
12125 || vectorizable_conversion (vinfo, stmt_info,
12126 NULL, NULL, node, cost_vec)
12127 || vectorizable_operation (vinfo, stmt_info,
12128 NULL, NULL, node, cost_vec)
12129 || vectorizable_assignment (vinfo, stmt_info,
12130 NULL, NULL, node, cost_vec)
12131 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12132 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12133 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
12134 node, node_instance, cost_vec)
12135 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
12136 NULL, node, cost_vec)
12137 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12138 || vectorizable_condition (vinfo, stmt_info,
12139 NULL, NULL, node, cost_vec)
12140 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
12141 cost_vec)
12142 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
12143 stmt_info, NULL, node)
12144 || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
12145 stmt_info, NULL, node, cost_vec));
12146 else
12148 if (bb_vinfo)
12149 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12150 || vectorizable_simd_clone_call (vinfo, stmt_info,
12151 NULL, NULL, node, cost_vec)
12152 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
12153 cost_vec)
12154 || vectorizable_shift (vinfo, stmt_info,
12155 NULL, NULL, node, cost_vec)
12156 || vectorizable_operation (vinfo, stmt_info,
12157 NULL, NULL, node, cost_vec)
12158 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
12159 cost_vec)
12160 || vectorizable_load (vinfo, stmt_info,
12161 NULL, NULL, node, cost_vec)
12162 || vectorizable_store (vinfo, stmt_info,
12163 NULL, NULL, node, cost_vec)
12164 || vectorizable_condition (vinfo, stmt_info,
12165 NULL, NULL, node, cost_vec)
12166 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
12167 cost_vec)
12168 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
12171 if (node)
12172 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
12174 if (!ok)
12175 return opt_result::failure_at (stmt_info->stmt,
12176 "not vectorized:"
12177 " relevant stmt not supported: %G",
12178 stmt_info->stmt);
12180 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
12181 need extra handling, except for vectorizable reductions. */
12182 if (!bb_vinfo
12183 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
12184 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
12185 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
12186 stmt_info, NULL, node, node_instance,
12187 false, cost_vec))
12188 return opt_result::failure_at (stmt_info->stmt,
12189 "not vectorized:"
12190 " live stmt not supported: %G",
12191 stmt_info->stmt);
12193 return opt_result::success ();
12197 /* Function vect_transform_stmt.
12199 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
12201 bool
12202 vect_transform_stmt (vec_info *vinfo,
12203 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12204 slp_tree slp_node, slp_instance slp_node_instance)
12206 bool is_store = false;
12207 gimple *vec_stmt = NULL;
12208 bool done;
12210 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
12212 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
12213 if (slp_node)
12214 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
12216 switch (STMT_VINFO_TYPE (stmt_info))
12218 case type_demotion_vec_info_type:
12219 case type_promotion_vec_info_type:
12220 case type_conversion_vec_info_type:
12221 done = vectorizable_conversion (vinfo, stmt_info,
12222 gsi, &vec_stmt, slp_node, NULL);
12223 gcc_assert (done);
12224 break;
12226 case induc_vec_info_type:
12227 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
12228 stmt_info, &vec_stmt, slp_node,
12229 NULL);
12230 gcc_assert (done);
12231 break;
12233 case shift_vec_info_type:
12234 done = vectorizable_shift (vinfo, stmt_info,
12235 gsi, &vec_stmt, slp_node, NULL);
12236 gcc_assert (done);
12237 break;
12239 case op_vec_info_type:
12240 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
12241 NULL);
12242 gcc_assert (done);
12243 break;
12245 case assignment_vec_info_type:
12246 done = vectorizable_assignment (vinfo, stmt_info,
12247 gsi, &vec_stmt, slp_node, NULL);
12248 gcc_assert (done);
12249 break;
12251 case load_vec_info_type:
12252 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
12253 NULL);
12254 gcc_assert (done);
12255 break;
12257 case store_vec_info_type:
12258 done = vectorizable_store (vinfo, stmt_info,
12259 gsi, &vec_stmt, slp_node, NULL);
12260 gcc_assert (done);
12261 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
12263 /* In case of interleaving, the whole chain is vectorized when the
12264 last store in the chain is reached. Store stmts before the last
12265 one are skipped, and there vec_stmt_info shouldn't be freed
12266 meanwhile. */
12267 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
12268 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
12269 is_store = true;
12271 else
12272 is_store = true;
12273 break;
12275 case condition_vec_info_type:
12276 done = vectorizable_condition (vinfo, stmt_info,
12277 gsi, &vec_stmt, slp_node, NULL);
12278 gcc_assert (done);
12279 break;
12281 case comparison_vec_info_type:
12282 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
12283 slp_node, NULL);
12284 gcc_assert (done);
12285 break;
12287 case call_vec_info_type:
12288 done = vectorizable_call (vinfo, stmt_info,
12289 gsi, &vec_stmt, slp_node, NULL);
12290 break;
12292 case call_simd_clone_vec_info_type:
12293 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
12294 slp_node, NULL);
12295 break;
12297 case reduc_vec_info_type:
12298 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
12299 gsi, &vec_stmt, slp_node);
12300 gcc_assert (done);
12301 break;
12303 case cycle_phi_info_type:
12304 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
12305 &vec_stmt, slp_node, slp_node_instance);
12306 gcc_assert (done);
12307 break;
12309 case lc_phi_info_type:
12310 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
12311 stmt_info, &vec_stmt, slp_node);
12312 gcc_assert (done);
12313 break;
12315 case recurr_info_type:
12316 done = vectorizable_recurr (as_a <loop_vec_info> (vinfo),
12317 stmt_info, &vec_stmt, slp_node, NULL);
12318 gcc_assert (done);
12319 break;
12321 case phi_info_type:
12322 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
12323 gcc_assert (done);
12324 break;
12326 default:
12327 if (!STMT_VINFO_LIVE_P (stmt_info))
12329 if (dump_enabled_p ())
12330 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12331 "stmt not supported.\n");
12332 gcc_unreachable ();
12334 done = true;
12337 if (!slp_node && vec_stmt)
12338 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
12340 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
12342 /* Handle stmts whose DEF is used outside the loop-nest that is
12343 being vectorized. */
12344 done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
12345 slp_node_instance, true, NULL);
12346 gcc_assert (done);
12349 if (slp_node)
12350 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
12352 return is_store;
12356 /* Remove a group of stores (for SLP or interleaving), free their
12357 stmt_vec_info. */
12359 void
12360 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
12362 stmt_vec_info next_stmt_info = first_stmt_info;
12364 while (next_stmt_info)
12366 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
12367 next_stmt_info = vect_orig_stmt (next_stmt_info);
12368 /* Free the attached stmt_vec_info and remove the stmt. */
12369 vinfo->remove_stmt (next_stmt_info);
12370 next_stmt_info = tmp;
12374 /* If NUNITS is nonzero, return a vector type that contains NUNITS
12375 elements of type SCALAR_TYPE, or null if the target doesn't support
12376 such a type.
12378 If NUNITS is zero, return a vector type that contains elements of
12379 type SCALAR_TYPE, choosing whichever vector size the target prefers.
12381 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
12382 for this vectorization region and want to "autodetect" the best choice.
12383 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
12384 and we want the new type to be interoperable with it. PREVAILING_MODE
12385 in this case can be a scalar integer mode or a vector mode; when it
12386 is a vector mode, the function acts like a tree-level version of
12387 related_vector_mode. */
12389 tree
12390 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
12391 tree scalar_type, poly_uint64 nunits)
12393 tree orig_scalar_type = scalar_type;
12394 scalar_mode inner_mode;
12395 machine_mode simd_mode;
12396 tree vectype;
12398 if ((!INTEGRAL_TYPE_P (scalar_type)
12399 && !POINTER_TYPE_P (scalar_type)
12400 && !SCALAR_FLOAT_TYPE_P (scalar_type))
12401 || (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
12402 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode)))
12403 return NULL_TREE;
12405 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
12407 /* Interoperability between modes requires one to be a constant multiple
12408 of the other, so that the number of vectors required for each operation
12409 is a compile-time constant. */
12410 if (prevailing_mode != VOIDmode
12411 && !constant_multiple_p (nunits * nbytes,
12412 GET_MODE_SIZE (prevailing_mode))
12413 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode),
12414 nunits * nbytes))
12415 return NULL_TREE;
12417 /* For vector types of elements whose mode precision doesn't
12418 match their types precision we use a element type of mode
12419 precision. The vectorization routines will have to make sure
12420 they support the proper result truncation/extension.
12421 We also make sure to build vector types with INTEGER_TYPE
12422 component type only. */
12423 if (INTEGRAL_TYPE_P (scalar_type)
12424 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
12425 || TREE_CODE (scalar_type) != INTEGER_TYPE))
12426 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
12427 TYPE_UNSIGNED (scalar_type));
12429 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
12430 When the component mode passes the above test simply use a type
12431 corresponding to that mode. The theory is that any use that
12432 would cause problems with this will disable vectorization anyway. */
12433 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
12434 && !INTEGRAL_TYPE_P (scalar_type))
12435 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
12437 /* We can't build a vector type of elements with alignment bigger than
12438 their size. */
12439 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
12440 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
12441 TYPE_UNSIGNED (scalar_type));
12443 /* If we felt back to using the mode fail if there was
12444 no scalar type for it. */
12445 if (scalar_type == NULL_TREE)
12446 return NULL_TREE;
12448 /* If no prevailing mode was supplied, use the mode the target prefers.
12449 Otherwise lookup a vector mode based on the prevailing mode. */
12450 if (prevailing_mode == VOIDmode)
12452 gcc_assert (known_eq (nunits, 0U));
12453 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
12454 if (SCALAR_INT_MODE_P (simd_mode))
12456 /* Traditional behavior is not to take the integer mode
12457 literally, but simply to use it as a way of determining
12458 the vector size. It is up to mode_for_vector to decide
12459 what the TYPE_MODE should be.
12461 Note that nunits == 1 is allowed in order to support single
12462 element vector types. */
12463 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
12464 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
12465 return NULL_TREE;
12468 else if (SCALAR_INT_MODE_P (prevailing_mode)
12469 || !related_vector_mode (prevailing_mode,
12470 inner_mode, nunits).exists (&simd_mode))
12472 /* Fall back to using mode_for_vector, mostly in the hope of being
12473 able to use an integer mode. */
12474 if (known_eq (nunits, 0U)
12475 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
12476 return NULL_TREE;
12478 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
12479 return NULL_TREE;
12482 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
12484 /* In cases where the mode was chosen by mode_for_vector, check that
12485 the target actually supports the chosen mode, or that it at least
12486 allows the vector mode to be replaced by a like-sized integer. */
12487 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
12488 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
12489 return NULL_TREE;
12491 /* Re-attach the address-space qualifier if we canonicalized the scalar
12492 type. */
12493 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
12494 return build_qualified_type
12495 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
12497 return vectype;
12500 /* Function get_vectype_for_scalar_type.
12502 Returns the vector type corresponding to SCALAR_TYPE as supported
12503 by the target. If GROUP_SIZE is nonzero and we're performing BB
12504 vectorization, make sure that the number of elements in the vector
12505 is no bigger than GROUP_SIZE. */
12507 tree
12508 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
12509 unsigned int group_size)
12511 /* For BB vectorization, we should always have a group size once we've
12512 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12513 are tentative requests during things like early data reference
12514 analysis and pattern recognition. */
12515 if (is_a <bb_vec_info> (vinfo))
12516 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12517 else
12518 group_size = 0;
12520 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
12521 scalar_type);
12522 if (vectype && vinfo->vector_mode == VOIDmode)
12523 vinfo->vector_mode = TYPE_MODE (vectype);
12525 /* Register the natural choice of vector type, before the group size
12526 has been applied. */
12527 if (vectype)
12528 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
12530 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
12531 try again with an explicit number of elements. */
12532 if (vectype
12533 && group_size
12534 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
12536 /* Start with the biggest number of units that fits within
12537 GROUP_SIZE and halve it until we find a valid vector type.
12538 Usually either the first attempt will succeed or all will
12539 fail (in the latter case because GROUP_SIZE is too small
12540 for the target), but it's possible that a target could have
12541 a hole between supported vector types.
12543 If GROUP_SIZE is not a power of 2, this has the effect of
12544 trying the largest power of 2 that fits within the group,
12545 even though the group is not a multiple of that vector size.
12546 The BB vectorizer will then try to carve up the group into
12547 smaller pieces. */
12548 unsigned int nunits = 1 << floor_log2 (group_size);
12551 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
12552 scalar_type, nunits);
12553 nunits /= 2;
12555 while (nunits > 1 && !vectype);
12558 return vectype;
12561 /* Return the vector type corresponding to SCALAR_TYPE as supported
12562 by the target. NODE, if nonnull, is the SLP tree node that will
12563 use the returned vector type. */
12565 tree
12566 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
12568 unsigned int group_size = 0;
12569 if (node)
12570 group_size = SLP_TREE_LANES (node);
12571 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12574 /* Function get_mask_type_for_scalar_type.
12576 Returns the mask type corresponding to a result of comparison
12577 of vectors of specified SCALAR_TYPE as supported by target.
12578 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12579 make sure that the number of elements in the vector is no bigger
12580 than GROUP_SIZE. */
12582 tree
12583 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
12584 unsigned int group_size)
12586 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12588 if (!vectype)
12589 return NULL;
12591 return truth_type_for (vectype);
12594 /* Function get_same_sized_vectype
12596 Returns a vector type corresponding to SCALAR_TYPE of size
12597 VECTOR_TYPE if supported by the target. */
12599 tree
12600 get_same_sized_vectype (tree scalar_type, tree vector_type)
12602 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
12603 return truth_type_for (vector_type);
12605 poly_uint64 nunits;
12606 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
12607 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
12608 return NULL_TREE;
12610 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
12611 scalar_type, nunits);
12614 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
12615 would not change the chosen vector modes. */
12617 bool
12618 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
12620 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
12621 i != vinfo->used_vector_modes.end (); ++i)
12622 if (!VECTOR_MODE_P (*i)
12623 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
12624 return false;
12625 return true;
12628 /* Function vect_is_simple_use.
12630 Input:
12631 VINFO - the vect info of the loop or basic block that is being vectorized.
12632 OPERAND - operand in the loop or bb.
12633 Output:
12634 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
12635 case OPERAND is an SSA_NAME that is defined in the vectorizable region
12636 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
12637 the definition could be anywhere in the function
12638 DT - the type of definition
12640 Returns whether a stmt with OPERAND can be vectorized.
12641 For loops, supportable operands are constants, loop invariants, and operands
12642 that are defined by the current iteration of the loop. Unsupportable
12643 operands are those that are defined by a previous iteration of the loop (as
12644 is the case in reduction/induction computations).
12645 For basic blocks, supportable operands are constants and bb invariants.
12646 For now, operands defined outside the basic block are not supported. */
12648 bool
12649 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
12650 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
12652 if (def_stmt_info_out)
12653 *def_stmt_info_out = NULL;
12654 if (def_stmt_out)
12655 *def_stmt_out = NULL;
12656 *dt = vect_unknown_def_type;
12658 if (dump_enabled_p ())
12660 dump_printf_loc (MSG_NOTE, vect_location,
12661 "vect_is_simple_use: operand ");
12662 if (TREE_CODE (operand) == SSA_NAME
12663 && !SSA_NAME_IS_DEFAULT_DEF (operand))
12664 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
12665 else
12666 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
12669 if (CONSTANT_CLASS_P (operand))
12670 *dt = vect_constant_def;
12671 else if (is_gimple_min_invariant (operand))
12672 *dt = vect_external_def;
12673 else if (TREE_CODE (operand) != SSA_NAME)
12674 *dt = vect_unknown_def_type;
12675 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
12676 *dt = vect_external_def;
12677 else
12679 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
12680 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
12681 if (!stmt_vinfo)
12682 *dt = vect_external_def;
12683 else
12685 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
12686 def_stmt = stmt_vinfo->stmt;
12687 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
12688 if (def_stmt_info_out)
12689 *def_stmt_info_out = stmt_vinfo;
12691 if (def_stmt_out)
12692 *def_stmt_out = def_stmt;
12695 if (dump_enabled_p ())
12697 dump_printf (MSG_NOTE, ", type of def: ");
12698 switch (*dt)
12700 case vect_uninitialized_def:
12701 dump_printf (MSG_NOTE, "uninitialized\n");
12702 break;
12703 case vect_constant_def:
12704 dump_printf (MSG_NOTE, "constant\n");
12705 break;
12706 case vect_external_def:
12707 dump_printf (MSG_NOTE, "external\n");
12708 break;
12709 case vect_internal_def:
12710 dump_printf (MSG_NOTE, "internal\n");
12711 break;
12712 case vect_induction_def:
12713 dump_printf (MSG_NOTE, "induction\n");
12714 break;
12715 case vect_reduction_def:
12716 dump_printf (MSG_NOTE, "reduction\n");
12717 break;
12718 case vect_double_reduction_def:
12719 dump_printf (MSG_NOTE, "double reduction\n");
12720 break;
12721 case vect_nested_cycle:
12722 dump_printf (MSG_NOTE, "nested cycle\n");
12723 break;
12724 case vect_first_order_recurrence:
12725 dump_printf (MSG_NOTE, "first order recurrence\n");
12726 break;
12727 case vect_unknown_def_type:
12728 dump_printf (MSG_NOTE, "unknown\n");
12729 break;
12733 if (*dt == vect_unknown_def_type)
12735 if (dump_enabled_p ())
12736 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12737 "Unsupported pattern.\n");
12738 return false;
12741 return true;
12744 /* Function vect_is_simple_use.
12746 Same as vect_is_simple_use but also determines the vector operand
12747 type of OPERAND and stores it to *VECTYPE. If the definition of
12748 OPERAND is vect_uninitialized_def, vect_constant_def or
12749 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
12750 is responsible to compute the best suited vector type for the
12751 scalar operand. */
12753 bool
12754 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
12755 tree *vectype, stmt_vec_info *def_stmt_info_out,
12756 gimple **def_stmt_out)
12758 stmt_vec_info def_stmt_info;
12759 gimple *def_stmt;
12760 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
12761 return false;
12763 if (def_stmt_out)
12764 *def_stmt_out = def_stmt;
12765 if (def_stmt_info_out)
12766 *def_stmt_info_out = def_stmt_info;
12768 /* Now get a vector type if the def is internal, otherwise supply
12769 NULL_TREE and leave it up to the caller to figure out a proper
12770 type for the use stmt. */
12771 if (*dt == vect_internal_def
12772 || *dt == vect_induction_def
12773 || *dt == vect_reduction_def
12774 || *dt == vect_double_reduction_def
12775 || *dt == vect_nested_cycle
12776 || *dt == vect_first_order_recurrence)
12778 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
12779 gcc_assert (*vectype != NULL_TREE);
12780 if (dump_enabled_p ())
12781 dump_printf_loc (MSG_NOTE, vect_location,
12782 "vect_is_simple_use: vectype %T\n", *vectype);
12784 else if (*dt == vect_uninitialized_def
12785 || *dt == vect_constant_def
12786 || *dt == vect_external_def)
12787 *vectype = NULL_TREE;
12788 else
12789 gcc_unreachable ();
12791 return true;
12794 /* Function vect_is_simple_use.
12796 Same as vect_is_simple_use but determines the operand by operand
12797 position OPERAND from either STMT or SLP_NODE, filling in *OP
12798 and *SLP_DEF (when SLP_NODE is not NULL). */
12800 bool
12801 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
12802 unsigned operand, tree *op, slp_tree *slp_def,
12803 enum vect_def_type *dt,
12804 tree *vectype, stmt_vec_info *def_stmt_info_out)
12806 if (slp_node)
12808 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
12809 *slp_def = child;
12810 *vectype = SLP_TREE_VECTYPE (child);
12811 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
12813 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
12814 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
12816 else
12818 if (def_stmt_info_out)
12819 *def_stmt_info_out = NULL;
12820 *op = SLP_TREE_SCALAR_OPS (child)[0];
12821 *dt = SLP_TREE_DEF_TYPE (child);
12822 return true;
12825 else
12827 *slp_def = NULL;
12828 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
12830 if (gimple_assign_rhs_code (ass) == COND_EXPR
12831 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
12833 if (operand < 2)
12834 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
12835 else
12836 *op = gimple_op (ass, operand);
12838 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
12839 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
12840 else
12841 *op = gimple_op (ass, operand + 1);
12843 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
12844 *op = gimple_call_arg (call, operand);
12845 else
12846 gcc_unreachable ();
12847 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
12851 /* If OP is not NULL and is external or constant update its vector
12852 type with VECTYPE. Returns true if successful or false if not,
12853 for example when conflicting vector types are present. */
12855 bool
12856 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
12858 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
12859 return true;
12860 if (SLP_TREE_VECTYPE (op))
12861 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
12862 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
12863 should be handled by patters. Allow vect_constant_def for now. */
12864 if (VECTOR_BOOLEAN_TYPE_P (vectype)
12865 && SLP_TREE_DEF_TYPE (op) == vect_external_def)
12866 return false;
12867 SLP_TREE_VECTYPE (op) = vectype;
12868 return true;
12871 /* Function supportable_widening_operation
12873 Check whether an operation represented by the code CODE is a
12874 widening operation that is supported by the target platform in
12875 vector form (i.e., when operating on arguments of type VECTYPE_IN
12876 producing a result of type VECTYPE_OUT).
12878 Widening operations we currently support are NOP (CONVERT), FLOAT,
12879 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
12880 are supported by the target platform either directly (via vector
12881 tree-codes), or via target builtins.
12883 Output:
12884 - CODE1 and CODE2 are codes of vector operations to be used when
12885 vectorizing the operation, if available.
12886 - MULTI_STEP_CVT determines the number of required intermediate steps in
12887 case of multi-step conversion (like char->short->int - in that case
12888 MULTI_STEP_CVT will be 1).
12889 - INTERM_TYPES contains the intermediate type required to perform the
12890 widening operation (short in the above example). */
12892 bool
12893 supportable_widening_operation (vec_info *vinfo,
12894 code_helper code,
12895 stmt_vec_info stmt_info,
12896 tree vectype_out, tree vectype_in,
12897 code_helper *code1,
12898 code_helper *code2,
12899 int *multi_step_cvt,
12900 vec<tree> *interm_types)
12902 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
12903 class loop *vect_loop = NULL;
12904 machine_mode vec_mode;
12905 enum insn_code icode1, icode2;
12906 optab optab1 = unknown_optab, optab2 = unknown_optab;
12907 tree vectype = vectype_in;
12908 tree wide_vectype = vectype_out;
12909 tree_code c1 = MAX_TREE_CODES, c2 = MAX_TREE_CODES;
12910 int i;
12911 tree prev_type, intermediate_type;
12912 machine_mode intermediate_mode, prev_mode;
12913 optab optab3, optab4;
12915 *multi_step_cvt = 0;
12916 if (loop_info)
12917 vect_loop = LOOP_VINFO_LOOP (loop_info);
12919 switch (code.safe_as_tree_code ())
12921 case MAX_TREE_CODES:
12922 /* Don't set c1 and c2 if code is not a tree_code. */
12923 break;
12925 case WIDEN_MULT_EXPR:
12926 /* The result of a vectorized widening operation usually requires
12927 two vectors (because the widened results do not fit into one vector).
12928 The generated vector results would normally be expected to be
12929 generated in the same order as in the original scalar computation,
12930 i.e. if 8 results are generated in each vector iteration, they are
12931 to be organized as follows:
12932 vect1: [res1,res2,res3,res4],
12933 vect2: [res5,res6,res7,res8].
12935 However, in the special case that the result of the widening
12936 operation is used in a reduction computation only, the order doesn't
12937 matter (because when vectorizing a reduction we change the order of
12938 the computation). Some targets can take advantage of this and
12939 generate more efficient code. For example, targets like Altivec,
12940 that support widen_mult using a sequence of {mult_even,mult_odd}
12941 generate the following vectors:
12942 vect1: [res1,res3,res5,res7],
12943 vect2: [res2,res4,res6,res8].
12945 When vectorizing outer-loops, we execute the inner-loop sequentially
12946 (each vectorized inner-loop iteration contributes to VF outer-loop
12947 iterations in parallel). We therefore don't allow to change the
12948 order of the computation in the inner-loop during outer-loop
12949 vectorization. */
12950 /* TODO: Another case in which order doesn't *really* matter is when we
12951 widen and then contract again, e.g. (short)((int)x * y >> 8).
12952 Normally, pack_trunc performs an even/odd permute, whereas the
12953 repack from an even/odd expansion would be an interleave, which
12954 would be significantly simpler for e.g. AVX2. */
12955 /* In any case, in order to avoid duplicating the code below, recurse
12956 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
12957 are properly set up for the caller. If we fail, we'll continue with
12958 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
12959 if (vect_loop
12960 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
12961 && !nested_in_vect_loop_p (vect_loop, stmt_info)
12962 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
12963 stmt_info, vectype_out,
12964 vectype_in, code1,
12965 code2, multi_step_cvt,
12966 interm_types))
12968 /* Elements in a vector with vect_used_by_reduction property cannot
12969 be reordered if the use chain with this property does not have the
12970 same operation. One such an example is s += a * b, where elements
12971 in a and b cannot be reordered. Here we check if the vector defined
12972 by STMT is only directly used in the reduction statement. */
12973 tree lhs = gimple_assign_lhs (stmt_info->stmt);
12974 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
12975 if (use_stmt_info
12976 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
12977 return true;
12979 c1 = VEC_WIDEN_MULT_LO_EXPR;
12980 c2 = VEC_WIDEN_MULT_HI_EXPR;
12981 break;
12983 case DOT_PROD_EXPR:
12984 c1 = DOT_PROD_EXPR;
12985 c2 = DOT_PROD_EXPR;
12986 break;
12988 case SAD_EXPR:
12989 c1 = SAD_EXPR;
12990 c2 = SAD_EXPR;
12991 break;
12993 case VEC_WIDEN_MULT_EVEN_EXPR:
12994 /* Support the recursion induced just above. */
12995 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
12996 c2 = VEC_WIDEN_MULT_ODD_EXPR;
12997 break;
12999 case WIDEN_LSHIFT_EXPR:
13000 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
13001 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
13002 break;
13004 CASE_CONVERT:
13005 c1 = VEC_UNPACK_LO_EXPR;
13006 c2 = VEC_UNPACK_HI_EXPR;
13007 break;
13009 case FLOAT_EXPR:
13010 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
13011 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
13012 break;
13014 case FIX_TRUNC_EXPR:
13015 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
13016 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
13017 break;
13019 default:
13020 gcc_unreachable ();
13023 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
13024 std::swap (c1, c2);
13026 if (code == FIX_TRUNC_EXPR)
13028 /* The signedness is determined from output operand. */
13029 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13030 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
13032 else if (CONVERT_EXPR_CODE_P (code.safe_as_tree_code ())
13033 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
13034 && VECTOR_BOOLEAN_TYPE_P (vectype)
13035 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
13036 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
13038 /* If the input and result modes are the same, a different optab
13039 is needed where we pass in the number of units in vectype. */
13040 optab1 = vec_unpacks_sbool_lo_optab;
13041 optab2 = vec_unpacks_sbool_hi_optab;
13044 vec_mode = TYPE_MODE (vectype);
13045 if (widening_fn_p (code))
13047 /* If this is an internal fn then we must check whether the target
13048 supports either a low-high split or an even-odd split. */
13049 internal_fn ifn = as_internal_fn ((combined_fn) code);
13051 internal_fn lo, hi, even, odd;
13052 lookup_hilo_internal_fn (ifn, &lo, &hi);
13053 *code1 = as_combined_fn (lo);
13054 *code2 = as_combined_fn (hi);
13055 optab1 = direct_internal_fn_optab (lo, {vectype, vectype});
13056 optab2 = direct_internal_fn_optab (hi, {vectype, vectype});
13058 /* If we don't support low-high, then check for even-odd. */
13059 if (!optab1
13060 || (icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
13061 || !optab2
13062 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
13064 lookup_evenodd_internal_fn (ifn, &even, &odd);
13065 *code1 = as_combined_fn (even);
13066 *code2 = as_combined_fn (odd);
13067 optab1 = direct_internal_fn_optab (even, {vectype, vectype});
13068 optab2 = direct_internal_fn_optab (odd, {vectype, vectype});
13071 else if (code.is_tree_code ())
13073 if (code == FIX_TRUNC_EXPR)
13075 /* The signedness is determined from output operand. */
13076 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13077 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
13079 else if (CONVERT_EXPR_CODE_P ((tree_code) code.safe_as_tree_code ())
13080 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
13081 && VECTOR_BOOLEAN_TYPE_P (vectype)
13082 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
13083 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
13085 /* If the input and result modes are the same, a different optab
13086 is needed where we pass in the number of units in vectype. */
13087 optab1 = vec_unpacks_sbool_lo_optab;
13088 optab2 = vec_unpacks_sbool_hi_optab;
13090 else
13092 optab1 = optab_for_tree_code (c1, vectype, optab_default);
13093 optab2 = optab_for_tree_code (c2, vectype, optab_default);
13095 *code1 = c1;
13096 *code2 = c2;
13099 if (!optab1 || !optab2)
13100 return false;
13102 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
13103 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
13104 return false;
13107 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
13108 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
13110 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13111 return true;
13112 /* For scalar masks we may have different boolean
13113 vector types having the same QImode. Thus we
13114 add additional check for elements number. */
13115 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
13116 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
13117 return true;
13120 /* Check if it's a multi-step conversion that can be done using intermediate
13121 types. */
13123 prev_type = vectype;
13124 prev_mode = vec_mode;
13126 if (!CONVERT_EXPR_CODE_P (code.safe_as_tree_code ()))
13127 return false;
13129 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
13130 intermediate steps in promotion sequence. We try
13131 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
13132 not. */
13133 interm_types->create (MAX_INTERM_CVT_STEPS);
13134 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
13136 intermediate_mode = insn_data[icode1].operand[0].mode;
13137 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
13138 intermediate_type
13139 = vect_halve_mask_nunits (prev_type, intermediate_mode);
13140 else if (VECTOR_MODE_P (intermediate_mode))
13142 tree intermediate_element_type
13143 = lang_hooks.types.type_for_mode (GET_MODE_INNER (intermediate_mode),
13144 TYPE_UNSIGNED (prev_type));
13145 intermediate_type
13146 = build_vector_type_for_mode (intermediate_element_type,
13147 intermediate_mode);
13149 else
13150 intermediate_type
13151 = lang_hooks.types.type_for_mode (intermediate_mode,
13152 TYPE_UNSIGNED (prev_type));
13154 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
13155 && VECTOR_BOOLEAN_TYPE_P (prev_type)
13156 && intermediate_mode == prev_mode
13157 && SCALAR_INT_MODE_P (prev_mode))
13159 /* If the input and result modes are the same, a different optab
13160 is needed where we pass in the number of units in vectype. */
13161 optab3 = vec_unpacks_sbool_lo_optab;
13162 optab4 = vec_unpacks_sbool_hi_optab;
13164 else
13166 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
13167 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
13170 if (!optab3 || !optab4
13171 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
13172 || insn_data[icode1].operand[0].mode != intermediate_mode
13173 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
13174 || insn_data[icode2].operand[0].mode != intermediate_mode
13175 || ((icode1 = optab_handler (optab3, intermediate_mode))
13176 == CODE_FOR_nothing)
13177 || ((icode2 = optab_handler (optab4, intermediate_mode))
13178 == CODE_FOR_nothing))
13179 break;
13181 interm_types->quick_push (intermediate_type);
13182 (*multi_step_cvt)++;
13184 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
13185 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
13187 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13188 return true;
13189 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
13190 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
13191 return true;
13194 prev_type = intermediate_type;
13195 prev_mode = intermediate_mode;
13198 interm_types->release ();
13199 return false;
13203 /* Function supportable_narrowing_operation
13205 Check whether an operation represented by the code CODE is a
13206 narrowing operation that is supported by the target platform in
13207 vector form (i.e., when operating on arguments of type VECTYPE_IN
13208 and producing a result of type VECTYPE_OUT).
13210 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
13211 and FLOAT. This function checks if these operations are supported by
13212 the target platform directly via vector tree-codes.
13214 Output:
13215 - CODE1 is the code of a vector operation to be used when
13216 vectorizing the operation, if available.
13217 - MULTI_STEP_CVT determines the number of required intermediate steps in
13218 case of multi-step conversion (like int->short->char - in that case
13219 MULTI_STEP_CVT will be 1).
13220 - INTERM_TYPES contains the intermediate type required to perform the
13221 narrowing operation (short in the above example). */
13223 bool
13224 supportable_narrowing_operation (code_helper code,
13225 tree vectype_out, tree vectype_in,
13226 code_helper *code1, int *multi_step_cvt,
13227 vec<tree> *interm_types)
13229 machine_mode vec_mode;
13230 enum insn_code icode1;
13231 optab optab1, interm_optab;
13232 tree vectype = vectype_in;
13233 tree narrow_vectype = vectype_out;
13234 enum tree_code c1;
13235 tree intermediate_type, prev_type;
13236 machine_mode intermediate_mode, prev_mode;
13237 int i;
13238 unsigned HOST_WIDE_INT n_elts;
13239 bool uns;
13241 if (!code.is_tree_code ())
13242 return false;
13244 *multi_step_cvt = 0;
13245 switch ((tree_code) code)
13247 CASE_CONVERT:
13248 c1 = VEC_PACK_TRUNC_EXPR;
13249 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
13250 && VECTOR_BOOLEAN_TYPE_P (vectype)
13251 && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
13252 && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
13253 && n_elts < BITS_PER_UNIT)
13254 optab1 = vec_pack_sbool_trunc_optab;
13255 else
13256 optab1 = optab_for_tree_code (c1, vectype, optab_default);
13257 break;
13259 case FIX_TRUNC_EXPR:
13260 c1 = VEC_PACK_FIX_TRUNC_EXPR;
13261 /* The signedness is determined from output operand. */
13262 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13263 break;
13265 case FLOAT_EXPR:
13266 c1 = VEC_PACK_FLOAT_EXPR;
13267 optab1 = optab_for_tree_code (c1, vectype, optab_default);
13268 break;
13270 default:
13271 gcc_unreachable ();
13274 if (!optab1)
13275 return false;
13277 vec_mode = TYPE_MODE (vectype);
13278 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
13279 return false;
13281 *code1 = c1;
13283 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
13285 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13286 return true;
13287 /* For scalar masks we may have different boolean
13288 vector types having the same QImode. Thus we
13289 add additional check for elements number. */
13290 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
13291 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
13292 return true;
13295 if (code == FLOAT_EXPR)
13296 return false;
13298 /* Check if it's a multi-step conversion that can be done using intermediate
13299 types. */
13300 prev_mode = vec_mode;
13301 prev_type = vectype;
13302 if (code == FIX_TRUNC_EXPR)
13303 uns = TYPE_UNSIGNED (vectype_out);
13304 else
13305 uns = TYPE_UNSIGNED (vectype);
13307 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
13308 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
13309 costly than signed. */
13310 if (code == FIX_TRUNC_EXPR && uns)
13312 enum insn_code icode2;
13314 intermediate_type
13315 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
13316 interm_optab
13317 = optab_for_tree_code (c1, intermediate_type, optab_default);
13318 if (interm_optab != unknown_optab
13319 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
13320 && insn_data[icode1].operand[0].mode
13321 == insn_data[icode2].operand[0].mode)
13323 uns = false;
13324 optab1 = interm_optab;
13325 icode1 = icode2;
13329 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
13330 intermediate steps in promotion sequence. We try
13331 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
13332 interm_types->create (MAX_INTERM_CVT_STEPS);
13333 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
13335 intermediate_mode = insn_data[icode1].operand[0].mode;
13336 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
13337 intermediate_type
13338 = vect_double_mask_nunits (prev_type, intermediate_mode);
13339 else
13340 intermediate_type
13341 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
13342 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
13343 && VECTOR_BOOLEAN_TYPE_P (prev_type)
13344 && SCALAR_INT_MODE_P (prev_mode)
13345 && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
13346 && n_elts < BITS_PER_UNIT)
13347 interm_optab = vec_pack_sbool_trunc_optab;
13348 else
13349 interm_optab
13350 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
13351 optab_default);
13352 if (!interm_optab
13353 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
13354 || insn_data[icode1].operand[0].mode != intermediate_mode
13355 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
13356 == CODE_FOR_nothing))
13357 break;
13359 interm_types->quick_push (intermediate_type);
13360 (*multi_step_cvt)++;
13362 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
13364 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13365 return true;
13366 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
13367 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
13368 return true;
13371 prev_mode = intermediate_mode;
13372 prev_type = intermediate_type;
13373 optab1 = interm_optab;
13376 interm_types->release ();
13377 return false;
13380 /* Generate and return a vector mask of MASK_TYPE such that
13381 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
13382 Add the statements to SEQ. */
13384 tree
13385 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
13386 tree end_index, const char *name)
13388 tree cmp_type = TREE_TYPE (start_index);
13389 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
13390 cmp_type, mask_type,
13391 OPTIMIZE_FOR_SPEED));
13392 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
13393 start_index, end_index,
13394 build_zero_cst (mask_type));
13395 tree tmp;
13396 if (name)
13397 tmp = make_temp_ssa_name (mask_type, NULL, name);
13398 else
13399 tmp = make_ssa_name (mask_type);
13400 gimple_call_set_lhs (call, tmp);
13401 gimple_seq_add_stmt (seq, call);
13402 return tmp;
13405 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
13406 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
13408 tree
13409 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
13410 tree end_index)
13412 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
13413 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
13416 /* Try to compute the vector types required to vectorize STMT_INFO,
13417 returning true on success and false if vectorization isn't possible.
13418 If GROUP_SIZE is nonzero and we're performing BB vectorization,
13419 take sure that the number of elements in the vectors is no bigger
13420 than GROUP_SIZE.
13422 On success:
13424 - Set *STMT_VECTYPE_OUT to:
13425 - NULL_TREE if the statement doesn't need to be vectorized;
13426 - the equivalent of STMT_VINFO_VECTYPE otherwise.
13428 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
13429 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
13430 statement does not help to determine the overall number of units. */
13432 opt_result
13433 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
13434 tree *stmt_vectype_out,
13435 tree *nunits_vectype_out,
13436 unsigned int group_size)
13438 gimple *stmt = stmt_info->stmt;
13440 /* For BB vectorization, we should always have a group size once we've
13441 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
13442 are tentative requests during things like early data reference
13443 analysis and pattern recognition. */
13444 if (is_a <bb_vec_info> (vinfo))
13445 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
13446 else
13447 group_size = 0;
13449 *stmt_vectype_out = NULL_TREE;
13450 *nunits_vectype_out = NULL_TREE;
13452 if (gimple_get_lhs (stmt) == NULL_TREE
13453 /* MASK_STORE has no lhs, but is ok. */
13454 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
13456 if (is_a <gcall *> (stmt))
13458 /* Ignore calls with no lhs. These must be calls to
13459 #pragma omp simd functions, and what vectorization factor
13460 it really needs can't be determined until
13461 vectorizable_simd_clone_call. */
13462 if (dump_enabled_p ())
13463 dump_printf_loc (MSG_NOTE, vect_location,
13464 "defer to SIMD clone analysis.\n");
13465 return opt_result::success ();
13468 return opt_result::failure_at (stmt,
13469 "not vectorized: irregular stmt.%G", stmt);
13472 tree vectype;
13473 tree scalar_type = NULL_TREE;
13474 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
13476 vectype = STMT_VINFO_VECTYPE (stmt_info);
13477 if (dump_enabled_p ())
13478 dump_printf_loc (MSG_NOTE, vect_location,
13479 "precomputed vectype: %T\n", vectype);
13481 else if (vect_use_mask_type_p (stmt_info))
13483 unsigned int precision = stmt_info->mask_precision;
13484 scalar_type = build_nonstandard_integer_type (precision, 1);
13485 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
13486 if (!vectype)
13487 return opt_result::failure_at (stmt, "not vectorized: unsupported"
13488 " data-type %T\n", scalar_type);
13489 if (dump_enabled_p ())
13490 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
13492 else
13494 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
13495 scalar_type = TREE_TYPE (DR_REF (dr));
13496 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
13497 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
13498 else
13499 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
13501 if (dump_enabled_p ())
13503 if (group_size)
13504 dump_printf_loc (MSG_NOTE, vect_location,
13505 "get vectype for scalar type (group size %d):"
13506 " %T\n", group_size, scalar_type);
13507 else
13508 dump_printf_loc (MSG_NOTE, vect_location,
13509 "get vectype for scalar type: %T\n", scalar_type);
13511 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13512 if (!vectype)
13513 return opt_result::failure_at (stmt,
13514 "not vectorized:"
13515 " unsupported data-type %T\n",
13516 scalar_type);
13518 if (dump_enabled_p ())
13519 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
13522 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
13523 return opt_result::failure_at (stmt,
13524 "not vectorized: vector stmt in loop:%G",
13525 stmt);
13527 *stmt_vectype_out = vectype;
13529 /* Don't try to compute scalar types if the stmt produces a boolean
13530 vector; use the existing vector type instead. */
13531 tree nunits_vectype = vectype;
13532 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13534 /* The number of units is set according to the smallest scalar
13535 type (or the largest vector size, but we only support one
13536 vector size per vectorization). */
13537 scalar_type = vect_get_smallest_scalar_type (stmt_info,
13538 TREE_TYPE (vectype));
13539 if (scalar_type != TREE_TYPE (vectype))
13541 if (dump_enabled_p ())
13542 dump_printf_loc (MSG_NOTE, vect_location,
13543 "get vectype for smallest scalar type: %T\n",
13544 scalar_type);
13545 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
13546 group_size);
13547 if (!nunits_vectype)
13548 return opt_result::failure_at
13549 (stmt, "not vectorized: unsupported data-type %T\n",
13550 scalar_type);
13551 if (dump_enabled_p ())
13552 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
13553 nunits_vectype);
13557 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
13558 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
13559 return opt_result::failure_at (stmt,
13560 "Not vectorized: Incompatible number "
13561 "of vector subparts between %T and %T\n",
13562 nunits_vectype, *stmt_vectype_out);
13564 if (dump_enabled_p ())
13566 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
13567 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
13568 dump_printf (MSG_NOTE, "\n");
13571 *nunits_vectype_out = nunits_vectype;
13572 return opt_result::success ();
13575 /* Generate and return statement sequence that sets vector length LEN that is:
13577 min_of_start_and_end = min (START_INDEX, END_INDEX);
13578 left_len = END_INDEX - min_of_start_and_end;
13579 rhs = min (left_len, LEN_LIMIT);
13580 LEN = rhs;
13582 Note: the cost of the code generated by this function is modeled
13583 by vect_estimate_min_profitable_iters, so changes here may need
13584 corresponding changes there. */
13586 gimple_seq
13587 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
13589 gimple_seq stmts = NULL;
13590 tree len_type = TREE_TYPE (len);
13591 gcc_assert (TREE_TYPE (start_index) == len_type);
13593 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
13594 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
13595 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
13596 gimple* stmt = gimple_build_assign (len, rhs);
13597 gimple_seq_add_stmt (&stmts, stmt);
13599 return stmts;