Add support for SVE gather loads
[official-gcc.git] / gcc / tree-vect-stmts.c
bloba308d801082c1ece2d4cf645326eec78297f73f8
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "tree-ssa-loop-niter.h"
54 #include "gimple-fold.h"
56 /* For lang_hooks.types.type_for_mode. */
57 #include "langhooks.h"
59 /* Return the vectorized type for the given statement. */
61 tree
62 stmt_vectype (struct _stmt_vec_info *stmt_info)
64 return STMT_VINFO_VECTYPE (stmt_info);
67 /* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69 bool
70 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
77 if (!loop_vinfo)
78 return false;
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
82 return (bb->loop_father == loop->inner);
85 /* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
89 unsigned
90 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92 int misalign, enum vect_cost_model_location where)
94 if ((kind == vector_load || kind == unaligned_load)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96 kind = vector_gather_load;
97 if ((kind == vector_store || kind == unaligned_store)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_scatter_store;
100 if (body_cost_vec)
102 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
103 stmt_info_for_cost si = { count, kind,
104 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
105 misalign };
106 body_cost_vec->safe_push (si);
107 return (unsigned)
108 (builtin_vectorization_cost (kind, vectype, misalign) * count);
110 else
111 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
112 count, kind, stmt_info, misalign, where);
115 /* Return a variable of type ELEM_TYPE[NELEMS]. */
117 static tree
118 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
120 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
121 "vect_array");
124 /* ARRAY is an array of vectors created by create_vector_array.
125 Return an SSA_NAME for the vector in index N. The reference
126 is part of the vectorization of STMT and the vector is associated
127 with scalar destination SCALAR_DEST. */
129 static tree
130 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
131 tree array, unsigned HOST_WIDE_INT n)
133 tree vect_type, vect, vect_name, array_ref;
134 gimple *new_stmt;
136 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
137 vect_type = TREE_TYPE (TREE_TYPE (array));
138 vect = vect_create_destination_var (scalar_dest, vect_type);
139 array_ref = build4 (ARRAY_REF, vect_type, array,
140 build_int_cst (size_type_node, n),
141 NULL_TREE, NULL_TREE);
143 new_stmt = gimple_build_assign (vect, array_ref);
144 vect_name = make_ssa_name (vect, new_stmt);
145 gimple_assign_set_lhs (new_stmt, vect_name);
146 vect_finish_stmt_generation (stmt, new_stmt, gsi);
148 return vect_name;
151 /* ARRAY is an array of vectors created by create_vector_array.
152 Emit code to store SSA_NAME VECT in index N of the array.
153 The store is part of the vectorization of STMT. */
155 static void
156 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
157 tree array, unsigned HOST_WIDE_INT n)
159 tree array_ref;
160 gimple *new_stmt;
162 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
163 build_int_cst (size_type_node, n),
164 NULL_TREE, NULL_TREE);
166 new_stmt = gimple_build_assign (array_ref, vect);
167 vect_finish_stmt_generation (stmt, new_stmt, gsi);
170 /* PTR is a pointer to an array of type TYPE. Return a representation
171 of *PTR. The memory reference replaces those in FIRST_DR
172 (and its group). */
174 static tree
175 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
177 tree mem_ref;
179 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
180 /* Arrays have the same alignment as their type. */
181 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
182 return mem_ref;
185 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
187 /* Function vect_mark_relevant.
189 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
191 static void
192 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
193 enum vect_relevant relevant, bool live_p)
195 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
196 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
197 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
198 gimple *pattern_stmt;
200 if (dump_enabled_p ())
202 dump_printf_loc (MSG_NOTE, vect_location,
203 "mark relevant %d, live %d: ", relevant, live_p);
204 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
207 /* If this stmt is an original stmt in a pattern, we might need to mark its
208 related pattern stmt instead of the original stmt. However, such stmts
209 may have their own uses that are not in any pattern, in such cases the
210 stmt itself should be marked. */
211 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
213 /* This is the last stmt in a sequence that was detected as a
214 pattern that can potentially be vectorized. Don't mark the stmt
215 as relevant/live because it's not going to be vectorized.
216 Instead mark the pattern-stmt that replaces it. */
218 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
220 if (dump_enabled_p ())
221 dump_printf_loc (MSG_NOTE, vect_location,
222 "last stmt in pattern. don't mark"
223 " relevant/live.\n");
224 stmt_info = vinfo_for_stmt (pattern_stmt);
225 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
226 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
227 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
228 stmt = pattern_stmt;
231 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
232 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
233 STMT_VINFO_RELEVANT (stmt_info) = relevant;
235 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
236 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
238 if (dump_enabled_p ())
239 dump_printf_loc (MSG_NOTE, vect_location,
240 "already marked relevant/live.\n");
241 return;
244 worklist->safe_push (stmt);
248 /* Function is_simple_and_all_uses_invariant
250 Return true if STMT is simple and all uses of it are invariant. */
252 bool
253 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
255 tree op;
256 gimple *def_stmt;
257 ssa_op_iter iter;
259 if (!is_gimple_assign (stmt))
260 return false;
262 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
264 enum vect_def_type dt = vect_uninitialized_def;
266 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
268 if (dump_enabled_p ())
269 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
270 "use not simple.\n");
271 return false;
274 if (dt != vect_external_def && dt != vect_constant_def)
275 return false;
277 return true;
280 /* Function vect_stmt_relevant_p.
282 Return true if STMT in loop that is represented by LOOP_VINFO is
283 "relevant for vectorization".
285 A stmt is considered "relevant for vectorization" if:
286 - it has uses outside the loop.
287 - it has vdefs (it alters memory).
288 - control stmts in the loop (except for the exit condition).
290 CHECKME: what other side effects would the vectorizer allow? */
292 static bool
293 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
294 enum vect_relevant *relevant, bool *live_p)
296 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
297 ssa_op_iter op_iter;
298 imm_use_iterator imm_iter;
299 use_operand_p use_p;
300 def_operand_p def_p;
302 *relevant = vect_unused_in_scope;
303 *live_p = false;
305 /* cond stmt other than loop exit cond. */
306 if (is_ctrl_stmt (stmt)
307 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
308 != loop_exit_ctrl_vec_info_type)
309 *relevant = vect_used_in_scope;
311 /* changing memory. */
312 if (gimple_code (stmt) != GIMPLE_PHI)
313 if (gimple_vdef (stmt)
314 && !gimple_clobber_p (stmt))
316 if (dump_enabled_p ())
317 dump_printf_loc (MSG_NOTE, vect_location,
318 "vec_stmt_relevant_p: stmt has vdefs.\n");
319 *relevant = vect_used_in_scope;
322 /* uses outside the loop. */
323 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
325 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
327 basic_block bb = gimple_bb (USE_STMT (use_p));
328 if (!flow_bb_inside_loop_p (loop, bb))
330 if (dump_enabled_p ())
331 dump_printf_loc (MSG_NOTE, vect_location,
332 "vec_stmt_relevant_p: used out of loop.\n");
334 if (is_gimple_debug (USE_STMT (use_p)))
335 continue;
337 /* We expect all such uses to be in the loop exit phis
338 (because of loop closed form) */
339 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
340 gcc_assert (bb == single_exit (loop)->dest);
342 *live_p = true;
347 if (*live_p && *relevant == vect_unused_in_scope
348 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
350 if (dump_enabled_p ())
351 dump_printf_loc (MSG_NOTE, vect_location,
352 "vec_stmt_relevant_p: stmt live but not relevant.\n");
353 *relevant = vect_used_only_live;
356 return (*live_p || *relevant);
360 /* Function exist_non_indexing_operands_for_use_p
362 USE is one of the uses attached to STMT. Check if USE is
363 used in STMT for anything other than indexing an array. */
365 static bool
366 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
368 tree operand;
369 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
371 /* USE corresponds to some operand in STMT. If there is no data
372 reference in STMT, then any operand that corresponds to USE
373 is not indexing an array. */
374 if (!STMT_VINFO_DATA_REF (stmt_info))
375 return true;
377 /* STMT has a data_ref. FORNOW this means that its of one of
378 the following forms:
379 -1- ARRAY_REF = var
380 -2- var = ARRAY_REF
381 (This should have been verified in analyze_data_refs).
383 'var' in the second case corresponds to a def, not a use,
384 so USE cannot correspond to any operands that are not used
385 for array indexing.
387 Therefore, all we need to check is if STMT falls into the
388 first case, and whether var corresponds to USE. */
390 if (!gimple_assign_copy_p (stmt))
392 if (is_gimple_call (stmt)
393 && gimple_call_internal_p (stmt))
395 internal_fn ifn = gimple_call_internal_fn (stmt);
396 int mask_index = internal_fn_mask_index (ifn);
397 if (mask_index >= 0
398 && use == gimple_call_arg (stmt, mask_index))
399 return true;
400 if (internal_gather_scatter_fn_p (ifn)
401 && use == gimple_call_arg (stmt, 1))
402 return true;
403 if (ifn == IFN_MASK_STORE
404 && use == gimple_call_arg (stmt, 3))
405 return true;
407 return false;
410 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
411 return false;
412 operand = gimple_assign_rhs1 (stmt);
413 if (TREE_CODE (operand) != SSA_NAME)
414 return false;
416 if (operand == use)
417 return true;
419 return false;
424 Function process_use.
426 Inputs:
427 - a USE in STMT in a loop represented by LOOP_VINFO
428 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
429 that defined USE. This is done by calling mark_relevant and passing it
430 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
431 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
432 be performed.
434 Outputs:
435 Generally, LIVE_P and RELEVANT are used to define the liveness and
436 relevance info of the DEF_STMT of this USE:
437 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
438 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
439 Exceptions:
440 - case 1: If USE is used only for address computations (e.g. array indexing),
441 which does not need to be directly vectorized, then the liveness/relevance
442 of the respective DEF_STMT is left unchanged.
443 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
444 skip DEF_STMT cause it had already been processed.
445 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
446 be modified accordingly.
448 Return true if everything is as expected. Return false otherwise. */
450 static bool
451 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
452 enum vect_relevant relevant, vec<gimple *> *worklist,
453 bool force)
455 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
456 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
457 stmt_vec_info dstmt_vinfo;
458 basic_block bb, def_bb;
459 gimple *def_stmt;
460 enum vect_def_type dt;
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
465 return true;
467 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
469 if (dump_enabled_p ())
470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
471 "not vectorized: unsupported use in stmt.\n");
472 return false;
475 if (!def_stmt || gimple_nop_p (def_stmt))
476 return true;
478 def_bb = gimple_bb (def_stmt);
479 if (!flow_bb_inside_loop_p (loop, def_bb))
481 if (dump_enabled_p ())
482 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
483 return true;
486 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
487 DEF_STMT must have already been processed, because this should be the
488 only way that STMT, which is a reduction-phi, was put in the worklist,
489 as there should be no other uses for DEF_STMT in the loop. So we just
490 check that everything is as expected, and we are done. */
491 dstmt_vinfo = vinfo_for_stmt (def_stmt);
492 bb = gimple_bb (stmt);
493 if (gimple_code (stmt) == GIMPLE_PHI
494 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
495 && gimple_code (def_stmt) != GIMPLE_PHI
496 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
497 && bb->loop_father == def_bb->loop_father)
499 if (dump_enabled_p ())
500 dump_printf_loc (MSG_NOTE, vect_location,
501 "reduc-stmt defining reduc-phi in the same nest.\n");
502 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
503 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
504 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
505 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
506 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
507 return true;
510 /* case 3a: outer-loop stmt defining an inner-loop stmt:
511 outer-loop-header-bb:
512 d = def_stmt
513 inner-loop:
514 stmt # use (d)
515 outer-loop-tail-bb:
516 ... */
517 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
519 if (dump_enabled_p ())
520 dump_printf_loc (MSG_NOTE, vect_location,
521 "outer-loop def-stmt defining inner-loop stmt.\n");
523 switch (relevant)
525 case vect_unused_in_scope:
526 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
527 vect_used_in_scope : vect_unused_in_scope;
528 break;
530 case vect_used_in_outer_by_reduction:
531 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
532 relevant = vect_used_by_reduction;
533 break;
535 case vect_used_in_outer:
536 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
537 relevant = vect_used_in_scope;
538 break;
540 case vect_used_in_scope:
541 break;
543 default:
544 gcc_unreachable ();
548 /* case 3b: inner-loop stmt defining an outer-loop stmt:
549 outer-loop-header-bb:
551 inner-loop:
552 d = def_stmt
553 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
554 stmt # use (d) */
555 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
557 if (dump_enabled_p ())
558 dump_printf_loc (MSG_NOTE, vect_location,
559 "inner-loop def-stmt defining outer-loop stmt.\n");
561 switch (relevant)
563 case vect_unused_in_scope:
564 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
565 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
566 vect_used_in_outer_by_reduction : vect_unused_in_scope;
567 break;
569 case vect_used_by_reduction:
570 case vect_used_only_live:
571 relevant = vect_used_in_outer_by_reduction;
572 break;
574 case vect_used_in_scope:
575 relevant = vect_used_in_outer;
576 break;
578 default:
579 gcc_unreachable ();
582 /* We are also not interested in uses on loop PHI backedges that are
583 inductions. Otherwise we'll needlessly vectorize the IV increment
584 and cause hybrid SLP for SLP inductions. Unless the PHI is live
585 of course. */
586 else if (gimple_code (stmt) == GIMPLE_PHI
587 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
588 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
589 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
590 == use))
592 if (dump_enabled_p ())
593 dump_printf_loc (MSG_NOTE, vect_location,
594 "induction value on backedge.\n");
595 return true;
599 vect_mark_relevant (worklist, def_stmt, relevant, false);
600 return true;
604 /* Function vect_mark_stmts_to_be_vectorized.
606 Not all stmts in the loop need to be vectorized. For example:
608 for i...
609 for j...
610 1. T0 = i + j
611 2. T1 = a[T0]
613 3. j = j + 1
615 Stmt 1 and 3 do not need to be vectorized, because loop control and
616 addressing of vectorized data-refs are handled differently.
618 This pass detects such stmts. */
620 bool
621 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
623 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
624 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
625 unsigned int nbbs = loop->num_nodes;
626 gimple_stmt_iterator si;
627 gimple *stmt;
628 unsigned int i;
629 stmt_vec_info stmt_vinfo;
630 basic_block bb;
631 gimple *phi;
632 bool live_p;
633 enum vect_relevant relevant;
635 if (dump_enabled_p ())
636 dump_printf_loc (MSG_NOTE, vect_location,
637 "=== vect_mark_stmts_to_be_vectorized ===\n");
639 auto_vec<gimple *, 64> worklist;
641 /* 1. Init worklist. */
642 for (i = 0; i < nbbs; i++)
644 bb = bbs[i];
645 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
647 phi = gsi_stmt (si);
648 if (dump_enabled_p ())
650 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
651 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
654 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
655 vect_mark_relevant (&worklist, phi, relevant, live_p);
657 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
659 stmt = gsi_stmt (si);
660 if (dump_enabled_p ())
662 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
663 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
666 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
667 vect_mark_relevant (&worklist, stmt, relevant, live_p);
671 /* 2. Process_worklist */
672 while (worklist.length () > 0)
674 use_operand_p use_p;
675 ssa_op_iter iter;
677 stmt = worklist.pop ();
678 if (dump_enabled_p ())
680 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
681 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
684 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
685 (DEF_STMT) as relevant/irrelevant according to the relevance property
686 of STMT. */
687 stmt_vinfo = vinfo_for_stmt (stmt);
688 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
690 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
691 propagated as is to the DEF_STMTs of its USEs.
693 One exception is when STMT has been identified as defining a reduction
694 variable; in this case we set the relevance to vect_used_by_reduction.
695 This is because we distinguish between two kinds of relevant stmts -
696 those that are used by a reduction computation, and those that are
697 (also) used by a regular computation. This allows us later on to
698 identify stmts that are used solely by a reduction, and therefore the
699 order of the results that they produce does not have to be kept. */
701 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
703 case vect_reduction_def:
704 gcc_assert (relevant != vect_unused_in_scope);
705 if (relevant != vect_unused_in_scope
706 && relevant != vect_used_in_scope
707 && relevant != vect_used_by_reduction
708 && relevant != vect_used_only_live)
710 if (dump_enabled_p ())
711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
712 "unsupported use of reduction.\n");
713 return false;
715 break;
717 case vect_nested_cycle:
718 if (relevant != vect_unused_in_scope
719 && relevant != vect_used_in_outer_by_reduction
720 && relevant != vect_used_in_outer)
722 if (dump_enabled_p ())
723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
724 "unsupported use of nested cycle.\n");
726 return false;
728 break;
730 case vect_double_reduction_def:
731 if (relevant != vect_unused_in_scope
732 && relevant != vect_used_by_reduction
733 && relevant != vect_used_only_live)
735 if (dump_enabled_p ())
736 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
737 "unsupported use of double reduction.\n");
739 return false;
741 break;
743 default:
744 break;
747 if (is_pattern_stmt_p (stmt_vinfo))
749 /* Pattern statements are not inserted into the code, so
750 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
751 have to scan the RHS or function arguments instead. */
752 if (is_gimple_assign (stmt))
754 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
755 tree op = gimple_assign_rhs1 (stmt);
757 i = 1;
758 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
760 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
761 relevant, &worklist, false)
762 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
763 relevant, &worklist, false))
764 return false;
765 i = 2;
767 for (; i < gimple_num_ops (stmt); i++)
769 op = gimple_op (stmt, i);
770 if (TREE_CODE (op) == SSA_NAME
771 && !process_use (stmt, op, loop_vinfo, relevant,
772 &worklist, false))
773 return false;
776 else if (is_gimple_call (stmt))
778 for (i = 0; i < gimple_call_num_args (stmt); i++)
780 tree arg = gimple_call_arg (stmt, i);
781 if (!process_use (stmt, arg, loop_vinfo, relevant,
782 &worklist, false))
783 return false;
787 else
788 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
790 tree op = USE_FROM_PTR (use_p);
791 if (!process_use (stmt, op, loop_vinfo, relevant,
792 &worklist, false))
793 return false;
796 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
798 gather_scatter_info gs_info;
799 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
800 gcc_unreachable ();
801 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
802 &worklist, true))
803 return false;
805 } /* while worklist */
807 return true;
811 /* Function vect_model_simple_cost.
813 Models cost for simple operations, i.e. those that only emit ncopies of a
814 single op. Right now, this does not account for multiple insns that could
815 be generated for the single vector op. We will handle that shortly. */
817 void
818 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
819 enum vect_def_type *dt,
820 int ndts,
821 stmt_vector_for_cost *prologue_cost_vec,
822 stmt_vector_for_cost *body_cost_vec)
824 int i;
825 int inside_cost = 0, prologue_cost = 0;
827 /* The SLP costs were already calculated during SLP tree build. */
828 if (PURE_SLP_STMT (stmt_info))
829 return;
831 /* Cost the "broadcast" of a scalar operand in to a vector operand.
832 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
833 cost model. */
834 for (i = 0; i < ndts; i++)
835 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
836 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
837 stmt_info, 0, vect_prologue);
839 /* Pass the inside-of-loop statements to the target-specific cost model. */
840 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
841 stmt_info, 0, vect_body);
843 if (dump_enabled_p ())
844 dump_printf_loc (MSG_NOTE, vect_location,
845 "vect_model_simple_cost: inside_cost = %d, "
846 "prologue_cost = %d .\n", inside_cost, prologue_cost);
850 /* Model cost for type demotion and promotion operations. PWR is normally
851 zero for single-step promotions and demotions. It will be one if
852 two-step promotion/demotion is required, and so on. Each additional
853 step doubles the number of instructions required. */
855 static void
856 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
857 enum vect_def_type *dt, int pwr)
859 int i, tmp;
860 int inside_cost = 0, prologue_cost = 0;
861 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
862 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
863 void *target_cost_data;
865 /* The SLP costs were already calculated during SLP tree build. */
866 if (PURE_SLP_STMT (stmt_info))
867 return;
869 if (loop_vinfo)
870 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
871 else
872 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
874 for (i = 0; i < pwr + 1; i++)
876 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
877 (i + 1) : i;
878 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
879 vec_promote_demote, stmt_info, 0,
880 vect_body);
883 /* FORNOW: Assuming maximum 2 args per stmts. */
884 for (i = 0; i < 2; i++)
885 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
886 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
887 stmt_info, 0, vect_prologue);
889 if (dump_enabled_p ())
890 dump_printf_loc (MSG_NOTE, vect_location,
891 "vect_model_promotion_demotion_cost: inside_cost = %d, "
892 "prologue_cost = %d .\n", inside_cost, prologue_cost);
895 /* Function vect_model_store_cost
897 Models cost for stores. In the case of grouped accesses, one access
898 has the overhead of the grouped access attributed to it. */
900 void
901 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
902 vect_memory_access_type memory_access_type,
903 vec_load_store_type vls_type, slp_tree slp_node,
904 stmt_vector_for_cost *prologue_cost_vec,
905 stmt_vector_for_cost *body_cost_vec)
907 unsigned int inside_cost = 0, prologue_cost = 0;
908 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
909 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
910 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
912 if (vls_type == VLS_STORE_INVARIANT)
913 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
914 stmt_info, 0, vect_prologue);
916 /* Grouped stores update all elements in the group at once,
917 so we want the DR for the first statement. */
918 if (!slp_node && grouped_access_p)
920 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
921 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
924 /* True if we should include any once-per-group costs as well as
925 the cost of the statement itself. For SLP we only get called
926 once per group anyhow. */
927 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
929 /* We assume that the cost of a single store-lanes instruction is
930 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
931 access is instead being provided by a permute-and-store operation,
932 include the cost of the permutes. */
933 if (first_stmt_p
934 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
936 /* Uses a high and low interleave or shuffle operations for each
937 needed permute. */
938 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
939 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
940 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
941 stmt_info, 0, vect_body);
943 if (dump_enabled_p ())
944 dump_printf_loc (MSG_NOTE, vect_location,
945 "vect_model_store_cost: strided group_size = %d .\n",
946 group_size);
949 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
950 /* Costs of the stores. */
951 if (memory_access_type == VMAT_ELEMENTWISE
952 || memory_access_type == VMAT_GATHER_SCATTER)
954 /* N scalar stores plus extracting the elements. */
955 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
956 inside_cost += record_stmt_cost (body_cost_vec,
957 ncopies * assumed_nunits,
958 scalar_store, stmt_info, 0, vect_body);
960 else
961 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
963 if (memory_access_type == VMAT_ELEMENTWISE
964 || memory_access_type == VMAT_STRIDED_SLP)
966 /* N scalar stores plus extracting the elements. */
967 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
968 inside_cost += record_stmt_cost (body_cost_vec,
969 ncopies * assumed_nunits,
970 vec_to_scalar, stmt_info, 0, vect_body);
973 if (dump_enabled_p ())
974 dump_printf_loc (MSG_NOTE, vect_location,
975 "vect_model_store_cost: inside_cost = %d, "
976 "prologue_cost = %d .\n", inside_cost, prologue_cost);
980 /* Calculate cost of DR's memory access. */
981 void
982 vect_get_store_cost (struct data_reference *dr, int ncopies,
983 unsigned int *inside_cost,
984 stmt_vector_for_cost *body_cost_vec)
986 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
987 gimple *stmt = DR_STMT (dr);
988 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
990 switch (alignment_support_scheme)
992 case dr_aligned:
994 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
995 vector_store, stmt_info, 0,
996 vect_body);
998 if (dump_enabled_p ())
999 dump_printf_loc (MSG_NOTE, vect_location,
1000 "vect_model_store_cost: aligned.\n");
1001 break;
1004 case dr_unaligned_supported:
1006 /* Here, we assign an additional cost for the unaligned store. */
1007 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1008 unaligned_store, stmt_info,
1009 DR_MISALIGNMENT (dr), vect_body);
1010 if (dump_enabled_p ())
1011 dump_printf_loc (MSG_NOTE, vect_location,
1012 "vect_model_store_cost: unaligned supported by "
1013 "hardware.\n");
1014 break;
1017 case dr_unaligned_unsupported:
1019 *inside_cost = VECT_MAX_COST;
1021 if (dump_enabled_p ())
1022 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1023 "vect_model_store_cost: unsupported access.\n");
1024 break;
1027 default:
1028 gcc_unreachable ();
1033 /* Function vect_model_load_cost
1035 Models cost for loads. In the case of grouped accesses, one access has
1036 the overhead of the grouped access attributed to it. Since unaligned
1037 accesses are supported for loads, we also account for the costs of the
1038 access scheme chosen. */
1040 void
1041 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1042 vect_memory_access_type memory_access_type,
1043 slp_tree slp_node,
1044 stmt_vector_for_cost *prologue_cost_vec,
1045 stmt_vector_for_cost *body_cost_vec)
1047 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1048 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1049 unsigned int inside_cost = 0, prologue_cost = 0;
1050 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1052 /* Grouped loads read all elements in the group at once,
1053 so we want the DR for the first statement. */
1054 if (!slp_node && grouped_access_p)
1056 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1057 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1060 /* True if we should include any once-per-group costs as well as
1061 the cost of the statement itself. For SLP we only get called
1062 once per group anyhow. */
1063 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1065 /* We assume that the cost of a single load-lanes instruction is
1066 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1067 access is instead being provided by a load-and-permute operation,
1068 include the cost of the permutes. */
1069 if (first_stmt_p
1070 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1072 /* Uses an even and odd extract operations or shuffle operations
1073 for each needed permute. */
1074 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1075 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1076 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1077 stmt_info, 0, vect_body);
1079 if (dump_enabled_p ())
1080 dump_printf_loc (MSG_NOTE, vect_location,
1081 "vect_model_load_cost: strided group_size = %d .\n",
1082 group_size);
1085 /* The loads themselves. */
1086 if (memory_access_type == VMAT_ELEMENTWISE
1087 || memory_access_type == VMAT_GATHER_SCATTER)
1089 /* N scalar loads plus gathering them into a vector. */
1090 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1091 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1092 inside_cost += record_stmt_cost (body_cost_vec,
1093 ncopies * assumed_nunits,
1094 scalar_load, stmt_info, 0, vect_body);
1096 else
1097 vect_get_load_cost (dr, ncopies, first_stmt_p,
1098 &inside_cost, &prologue_cost,
1099 prologue_cost_vec, body_cost_vec, true);
1100 if (memory_access_type == VMAT_ELEMENTWISE
1101 || memory_access_type == VMAT_STRIDED_SLP)
1102 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1103 stmt_info, 0, vect_body);
1105 if (dump_enabled_p ())
1106 dump_printf_loc (MSG_NOTE, vect_location,
1107 "vect_model_load_cost: inside_cost = %d, "
1108 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1112 /* Calculate cost of DR's memory access. */
1113 void
1114 vect_get_load_cost (struct data_reference *dr, int ncopies,
1115 bool add_realign_cost, unsigned int *inside_cost,
1116 unsigned int *prologue_cost,
1117 stmt_vector_for_cost *prologue_cost_vec,
1118 stmt_vector_for_cost *body_cost_vec,
1119 bool record_prologue_costs)
1121 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1122 gimple *stmt = DR_STMT (dr);
1123 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1125 switch (alignment_support_scheme)
1127 case dr_aligned:
1129 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1130 stmt_info, 0, vect_body);
1132 if (dump_enabled_p ())
1133 dump_printf_loc (MSG_NOTE, vect_location,
1134 "vect_model_load_cost: aligned.\n");
1136 break;
1138 case dr_unaligned_supported:
1140 /* Here, we assign an additional cost for the unaligned load. */
1141 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1142 unaligned_load, stmt_info,
1143 DR_MISALIGNMENT (dr), vect_body);
1145 if (dump_enabled_p ())
1146 dump_printf_loc (MSG_NOTE, vect_location,
1147 "vect_model_load_cost: unaligned supported by "
1148 "hardware.\n");
1150 break;
1152 case dr_explicit_realign:
1154 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1155 vector_load, stmt_info, 0, vect_body);
1156 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1157 vec_perm, stmt_info, 0, vect_body);
1159 /* FIXME: If the misalignment remains fixed across the iterations of
1160 the containing loop, the following cost should be added to the
1161 prologue costs. */
1162 if (targetm.vectorize.builtin_mask_for_load)
1163 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1164 stmt_info, 0, vect_body);
1166 if (dump_enabled_p ())
1167 dump_printf_loc (MSG_NOTE, vect_location,
1168 "vect_model_load_cost: explicit realign\n");
1170 break;
1172 case dr_explicit_realign_optimized:
1174 if (dump_enabled_p ())
1175 dump_printf_loc (MSG_NOTE, vect_location,
1176 "vect_model_load_cost: unaligned software "
1177 "pipelined.\n");
1179 /* Unaligned software pipeline has a load of an address, an initial
1180 load, and possibly a mask operation to "prime" the loop. However,
1181 if this is an access in a group of loads, which provide grouped
1182 access, then the above cost should only be considered for one
1183 access in the group. Inside the loop, there is a load op
1184 and a realignment op. */
1186 if (add_realign_cost && record_prologue_costs)
1188 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1189 vector_stmt, stmt_info,
1190 0, vect_prologue);
1191 if (targetm.vectorize.builtin_mask_for_load)
1192 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1193 vector_stmt, stmt_info,
1194 0, vect_prologue);
1197 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1198 stmt_info, 0, vect_body);
1199 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1200 stmt_info, 0, vect_body);
1202 if (dump_enabled_p ())
1203 dump_printf_loc (MSG_NOTE, vect_location,
1204 "vect_model_load_cost: explicit realign optimized"
1205 "\n");
1207 break;
1210 case dr_unaligned_unsupported:
1212 *inside_cost = VECT_MAX_COST;
1214 if (dump_enabled_p ())
1215 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1216 "vect_model_load_cost: unsupported access.\n");
1217 break;
1220 default:
1221 gcc_unreachable ();
1225 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1226 the loop preheader for the vectorized stmt STMT. */
1228 static void
1229 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1231 if (gsi)
1232 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1233 else
1235 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1236 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1238 if (loop_vinfo)
1240 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1241 basic_block new_bb;
1242 edge pe;
1244 if (nested_in_vect_loop_p (loop, stmt))
1245 loop = loop->inner;
1247 pe = loop_preheader_edge (loop);
1248 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1249 gcc_assert (!new_bb);
1251 else
1253 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1254 basic_block bb;
1255 gimple_stmt_iterator gsi_bb_start;
1257 gcc_assert (bb_vinfo);
1258 bb = BB_VINFO_BB (bb_vinfo);
1259 gsi_bb_start = gsi_after_labels (bb);
1260 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1264 if (dump_enabled_p ())
1266 dump_printf_loc (MSG_NOTE, vect_location,
1267 "created new init_stmt: ");
1268 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1272 /* Function vect_init_vector.
1274 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1275 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1276 vector type a vector with all elements equal to VAL is created first.
1277 Place the initialization at BSI if it is not NULL. Otherwise, place the
1278 initialization at the loop preheader.
1279 Return the DEF of INIT_STMT.
1280 It will be used in the vectorization of STMT. */
1282 tree
1283 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1285 gimple *init_stmt;
1286 tree new_temp;
1288 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1289 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1291 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1292 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1294 /* Scalar boolean value should be transformed into
1295 all zeros or all ones value before building a vector. */
1296 if (VECTOR_BOOLEAN_TYPE_P (type))
1298 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1299 tree false_val = build_zero_cst (TREE_TYPE (type));
1301 if (CONSTANT_CLASS_P (val))
1302 val = integer_zerop (val) ? false_val : true_val;
1303 else
1305 new_temp = make_ssa_name (TREE_TYPE (type));
1306 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1307 val, true_val, false_val);
1308 vect_init_vector_1 (stmt, init_stmt, gsi);
1309 val = new_temp;
1312 else if (CONSTANT_CLASS_P (val))
1313 val = fold_convert (TREE_TYPE (type), val);
1314 else
1316 new_temp = make_ssa_name (TREE_TYPE (type));
1317 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1318 init_stmt = gimple_build_assign (new_temp,
1319 fold_build1 (VIEW_CONVERT_EXPR,
1320 TREE_TYPE (type),
1321 val));
1322 else
1323 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1324 vect_init_vector_1 (stmt, init_stmt, gsi);
1325 val = new_temp;
1328 val = build_vector_from_val (type, val);
1331 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1332 init_stmt = gimple_build_assign (new_temp, val);
1333 vect_init_vector_1 (stmt, init_stmt, gsi);
1334 return new_temp;
1337 /* Function vect_get_vec_def_for_operand_1.
1339 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1340 DT that will be used in the vectorized stmt. */
1342 tree
1343 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1345 tree vec_oprnd;
1346 gimple *vec_stmt;
1347 stmt_vec_info def_stmt_info = NULL;
1349 switch (dt)
1351 /* operand is a constant or a loop invariant. */
1352 case vect_constant_def:
1353 case vect_external_def:
1354 /* Code should use vect_get_vec_def_for_operand. */
1355 gcc_unreachable ();
1357 /* operand is defined inside the loop. */
1358 case vect_internal_def:
1360 /* Get the def from the vectorized stmt. */
1361 def_stmt_info = vinfo_for_stmt (def_stmt);
1363 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1364 /* Get vectorized pattern statement. */
1365 if (!vec_stmt
1366 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1367 && !STMT_VINFO_RELEVANT (def_stmt_info))
1368 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1369 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1370 gcc_assert (vec_stmt);
1371 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1372 vec_oprnd = PHI_RESULT (vec_stmt);
1373 else if (is_gimple_call (vec_stmt))
1374 vec_oprnd = gimple_call_lhs (vec_stmt);
1375 else
1376 vec_oprnd = gimple_assign_lhs (vec_stmt);
1377 return vec_oprnd;
1380 /* operand is defined by a loop header phi. */
1381 case vect_reduction_def:
1382 case vect_double_reduction_def:
1383 case vect_nested_cycle:
1384 case vect_induction_def:
1386 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1388 /* Get the def from the vectorized stmt. */
1389 def_stmt_info = vinfo_for_stmt (def_stmt);
1390 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1391 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1392 vec_oprnd = PHI_RESULT (vec_stmt);
1393 else
1394 vec_oprnd = gimple_get_lhs (vec_stmt);
1395 return vec_oprnd;
1398 default:
1399 gcc_unreachable ();
1404 /* Function vect_get_vec_def_for_operand.
1406 OP is an operand in STMT. This function returns a (vector) def that will be
1407 used in the vectorized stmt for STMT.
1409 In the case that OP is an SSA_NAME which is defined in the loop, then
1410 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1412 In case OP is an invariant or constant, a new stmt that creates a vector def
1413 needs to be introduced. VECTYPE may be used to specify a required type for
1414 vector invariant. */
1416 tree
1417 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1419 gimple *def_stmt;
1420 enum vect_def_type dt;
1421 bool is_simple_use;
1422 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1423 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1425 if (dump_enabled_p ())
1427 dump_printf_loc (MSG_NOTE, vect_location,
1428 "vect_get_vec_def_for_operand: ");
1429 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1430 dump_printf (MSG_NOTE, "\n");
1433 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1434 gcc_assert (is_simple_use);
1435 if (def_stmt && dump_enabled_p ())
1437 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1438 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1441 if (dt == vect_constant_def || dt == vect_external_def)
1443 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1444 tree vector_type;
1446 if (vectype)
1447 vector_type = vectype;
1448 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1449 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1450 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1451 else
1452 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1454 gcc_assert (vector_type);
1455 return vect_init_vector (stmt, op, vector_type, NULL);
1457 else
1458 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1462 /* Function vect_get_vec_def_for_stmt_copy
1464 Return a vector-def for an operand. This function is used when the
1465 vectorized stmt to be created (by the caller to this function) is a "copy"
1466 created in case the vectorized result cannot fit in one vector, and several
1467 copies of the vector-stmt are required. In this case the vector-def is
1468 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1469 of the stmt that defines VEC_OPRND.
1470 DT is the type of the vector def VEC_OPRND.
1472 Context:
1473 In case the vectorization factor (VF) is bigger than the number
1474 of elements that can fit in a vectype (nunits), we have to generate
1475 more than one vector stmt to vectorize the scalar stmt. This situation
1476 arises when there are multiple data-types operated upon in the loop; the
1477 smallest data-type determines the VF, and as a result, when vectorizing
1478 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1479 vector stmt (each computing a vector of 'nunits' results, and together
1480 computing 'VF' results in each iteration). This function is called when
1481 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1482 which VF=16 and nunits=4, so the number of copies required is 4):
1484 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1486 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1487 VS1.1: vx.1 = memref1 VS1.2
1488 VS1.2: vx.2 = memref2 VS1.3
1489 VS1.3: vx.3 = memref3
1491 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1492 VSnew.1: vz1 = vx.1 + ... VSnew.2
1493 VSnew.2: vz2 = vx.2 + ... VSnew.3
1494 VSnew.3: vz3 = vx.3 + ...
1496 The vectorization of S1 is explained in vectorizable_load.
1497 The vectorization of S2:
1498 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1499 the function 'vect_get_vec_def_for_operand' is called to
1500 get the relevant vector-def for each operand of S2. For operand x it
1501 returns the vector-def 'vx.0'.
1503 To create the remaining copies of the vector-stmt (VSnew.j), this
1504 function is called to get the relevant vector-def for each operand. It is
1505 obtained from the respective VS1.j stmt, which is recorded in the
1506 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1508 For example, to obtain the vector-def 'vx.1' in order to create the
1509 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1510 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1511 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1512 and return its def ('vx.1').
1513 Overall, to create the above sequence this function will be called 3 times:
1514 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1515 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1516 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1518 tree
1519 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1521 gimple *vec_stmt_for_operand;
1522 stmt_vec_info def_stmt_info;
1524 /* Do nothing; can reuse same def. */
1525 if (dt == vect_external_def || dt == vect_constant_def )
1526 return vec_oprnd;
1528 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1529 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1530 gcc_assert (def_stmt_info);
1531 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1532 gcc_assert (vec_stmt_for_operand);
1533 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1534 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1535 else
1536 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1537 return vec_oprnd;
1541 /* Get vectorized definitions for the operands to create a copy of an original
1542 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1544 void
1545 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1546 vec<tree> *vec_oprnds0,
1547 vec<tree> *vec_oprnds1)
1549 tree vec_oprnd = vec_oprnds0->pop ();
1551 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1552 vec_oprnds0->quick_push (vec_oprnd);
1554 if (vec_oprnds1 && vec_oprnds1->length ())
1556 vec_oprnd = vec_oprnds1->pop ();
1557 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1558 vec_oprnds1->quick_push (vec_oprnd);
1563 /* Get vectorized definitions for OP0 and OP1. */
1565 void
1566 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1567 vec<tree> *vec_oprnds0,
1568 vec<tree> *vec_oprnds1,
1569 slp_tree slp_node)
1571 if (slp_node)
1573 int nops = (op1 == NULL_TREE) ? 1 : 2;
1574 auto_vec<tree> ops (nops);
1575 auto_vec<vec<tree> > vec_defs (nops);
1577 ops.quick_push (op0);
1578 if (op1)
1579 ops.quick_push (op1);
1581 vect_get_slp_defs (ops, slp_node, &vec_defs);
1583 *vec_oprnds0 = vec_defs[0];
1584 if (op1)
1585 *vec_oprnds1 = vec_defs[1];
1587 else
1589 tree vec_oprnd;
1591 vec_oprnds0->create (1);
1592 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1593 vec_oprnds0->quick_push (vec_oprnd);
1595 if (op1)
1597 vec_oprnds1->create (1);
1598 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1599 vec_oprnds1->quick_push (vec_oprnd);
1604 /* Helper function called by vect_finish_replace_stmt and
1605 vect_finish_stmt_generation. Set the location of the new
1606 statement and create a stmt_vec_info for it. */
1608 static void
1609 vect_finish_stmt_generation_1 (gimple *stmt, gimple *vec_stmt)
1611 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1612 vec_info *vinfo = stmt_info->vinfo;
1614 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1616 if (dump_enabled_p ())
1618 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1619 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1622 gimple_set_location (vec_stmt, gimple_location (stmt));
1624 /* While EH edges will generally prevent vectorization, stmt might
1625 e.g. be in a must-not-throw region. Ensure newly created stmts
1626 that could throw are part of the same region. */
1627 int lp_nr = lookup_stmt_eh_lp (stmt);
1628 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1629 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1632 /* Replace the scalar statement STMT with a new vector statement VEC_STMT,
1633 which sets the same scalar result as STMT did. */
1635 void
1636 vect_finish_replace_stmt (gimple *stmt, gimple *vec_stmt)
1638 gcc_assert (gimple_get_lhs (stmt) == gimple_get_lhs (vec_stmt));
1640 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1641 gsi_replace (&gsi, vec_stmt, false);
1643 vect_finish_stmt_generation_1 (stmt, vec_stmt);
1646 /* Function vect_finish_stmt_generation.
1648 Insert a new stmt. */
1650 void
1651 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1652 gimple_stmt_iterator *gsi)
1654 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1656 if (!gsi_end_p (*gsi)
1657 && gimple_has_mem_ops (vec_stmt))
1659 gimple *at_stmt = gsi_stmt (*gsi);
1660 tree vuse = gimple_vuse (at_stmt);
1661 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1663 tree vdef = gimple_vdef (at_stmt);
1664 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1665 /* If we have an SSA vuse and insert a store, update virtual
1666 SSA form to avoid triggering the renamer. Do so only
1667 if we can easily see all uses - which is what almost always
1668 happens with the way vectorized stmts are inserted. */
1669 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1670 && ((is_gimple_assign (vec_stmt)
1671 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1672 || (is_gimple_call (vec_stmt)
1673 && !(gimple_call_flags (vec_stmt)
1674 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1676 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1677 gimple_set_vdef (vec_stmt, new_vdef);
1678 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1682 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1683 vect_finish_stmt_generation_1 (stmt, vec_stmt);
1686 /* We want to vectorize a call to combined function CFN with function
1687 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1688 as the types of all inputs. Check whether this is possible using
1689 an internal function, returning its code if so or IFN_LAST if not. */
1691 static internal_fn
1692 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1693 tree vectype_out, tree vectype_in)
1695 internal_fn ifn;
1696 if (internal_fn_p (cfn))
1697 ifn = as_internal_fn (cfn);
1698 else
1699 ifn = associated_internal_fn (fndecl);
1700 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1702 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1703 if (info.vectorizable)
1705 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1706 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1707 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1708 OPTIMIZE_FOR_SPEED))
1709 return ifn;
1712 return IFN_LAST;
1716 static tree permute_vec_elements (tree, tree, tree, gimple *,
1717 gimple_stmt_iterator *);
1719 /* Check whether a load or store statement in the loop described by
1720 LOOP_VINFO is possible in a fully-masked loop. This is testing
1721 whether the vectorizer pass has the appropriate support, as well as
1722 whether the target does.
1724 VLS_TYPE says whether the statement is a load or store and VECTYPE
1725 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1726 says how the load or store is going to be implemented and GROUP_SIZE
1727 is the number of load or store statements in the containing group.
1728 If the access is a gather load or scatter store, GS_INFO describes
1729 its arguments.
1731 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1732 supported, otherwise record the required mask types. */
1734 static void
1735 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1736 vec_load_store_type vls_type, int group_size,
1737 vect_memory_access_type memory_access_type,
1738 gather_scatter_info *gs_info)
1740 /* Invariant loads need no special support. */
1741 if (memory_access_type == VMAT_INVARIANT)
1742 return;
1744 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1745 machine_mode vecmode = TYPE_MODE (vectype);
1746 bool is_load = (vls_type == VLS_LOAD);
1747 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1749 if (is_load
1750 ? !vect_load_lanes_supported (vectype, group_size, true)
1751 : !vect_store_lanes_supported (vectype, group_size, true))
1753 if (dump_enabled_p ())
1754 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1755 "can't use a fully-masked loop because the"
1756 " target doesn't have an appropriate masked"
1757 " load/store-lanes instruction.\n");
1758 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1759 return;
1761 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1762 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1763 return;
1766 if (memory_access_type == VMAT_GATHER_SCATTER)
1768 gcc_assert (is_load);
1769 tree offset_type = TREE_TYPE (gs_info->offset);
1770 if (!internal_gather_scatter_fn_supported_p (IFN_MASK_GATHER_LOAD,
1771 vectype,
1772 gs_info->memory_type,
1773 TYPE_SIGN (offset_type),
1774 gs_info->scale))
1776 if (dump_enabled_p ())
1777 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1778 "can't use a fully-masked loop because the"
1779 " target doesn't have an appropriate masked"
1780 " gather load instruction.\n");
1781 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1782 return;
1784 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1785 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1786 return;
1789 if (memory_access_type != VMAT_CONTIGUOUS
1790 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1792 /* Element X of the data must come from iteration i * VF + X of the
1793 scalar loop. We need more work to support other mappings. */
1794 if (dump_enabled_p ())
1795 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1796 "can't use a fully-masked loop because an access"
1797 " isn't contiguous.\n");
1798 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1799 return;
1802 machine_mode mask_mode;
1803 if (!(targetm.vectorize.get_mask_mode
1804 (GET_MODE_NUNITS (vecmode),
1805 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1806 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1808 if (dump_enabled_p ())
1809 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1810 "can't use a fully-masked loop because the target"
1811 " doesn't have the appropriate masked load or"
1812 " store.\n");
1813 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1814 return;
1816 /* We might load more scalars than we need for permuting SLP loads.
1817 We checked in get_group_load_store_type that the extra elements
1818 don't leak into a new vector. */
1819 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1820 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1821 unsigned int nvectors;
1822 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1823 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1824 else
1825 gcc_unreachable ();
1828 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1829 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1830 that needs to be applied to all loads and stores in a vectorized loop.
1831 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1833 MASK_TYPE is the type of both masks. If new statements are needed,
1834 insert them before GSI. */
1836 static tree
1837 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1838 gimple_stmt_iterator *gsi)
1840 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1841 if (!loop_mask)
1842 return vec_mask;
1844 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1845 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1846 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1847 vec_mask, loop_mask);
1848 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1849 return and_res;
1852 /* STMT is a non-strided load or store, meaning that it accesses
1853 elements with a known constant step. Return -1 if that step
1854 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1856 static int
1857 compare_step_with_zero (gimple *stmt)
1859 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1860 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1861 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
1862 size_zero_node);
1865 /* If the target supports a permute mask that reverses the elements in
1866 a vector of type VECTYPE, return that mask, otherwise return null. */
1868 static tree
1869 perm_mask_for_reverse (tree vectype)
1871 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1873 /* The encoding has a single stepped pattern. */
1874 vec_perm_builder sel (nunits, 1, 3);
1875 for (int i = 0; i < 3; ++i)
1876 sel.quick_push (nunits - 1 - i);
1878 vec_perm_indices indices (sel, 1, nunits);
1879 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1880 return NULL_TREE;
1881 return vect_gen_perm_mask_checked (vectype, indices);
1884 /* STMT is either a masked or unconditional store. Return the value
1885 being stored. */
1887 static tree
1888 vect_get_store_rhs (gimple *stmt)
1890 if (gassign *assign = dyn_cast <gassign *> (stmt))
1892 gcc_assert (gimple_assign_single_p (assign));
1893 return gimple_assign_rhs1 (assign);
1895 if (gcall *call = dyn_cast <gcall *> (stmt))
1897 internal_fn ifn = gimple_call_internal_fn (call);
1898 gcc_assert (ifn == IFN_MASK_STORE);
1899 return gimple_call_arg (stmt, 3);
1901 gcc_unreachable ();
1904 /* A subroutine of get_load_store_type, with a subset of the same
1905 arguments. Handle the case where STMT is part of a grouped load
1906 or store.
1908 For stores, the statements in the group are all consecutive
1909 and there is no gap at the end. For loads, the statements in the
1910 group might not be consecutive; there can be gaps between statements
1911 as well as at the end. */
1913 static bool
1914 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1915 bool masked_p, vec_load_store_type vls_type,
1916 vect_memory_access_type *memory_access_type)
1918 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1919 vec_info *vinfo = stmt_info->vinfo;
1920 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1921 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1922 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1923 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1924 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1925 bool single_element_p = (stmt == first_stmt
1926 && !GROUP_NEXT_ELEMENT (stmt_info));
1927 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
1928 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1930 /* True if the vectorized statements would access beyond the last
1931 statement in the group. */
1932 bool overrun_p = false;
1934 /* True if we can cope with such overrun by peeling for gaps, so that
1935 there is at least one final scalar iteration after the vector loop. */
1936 bool can_overrun_p = (!masked_p
1937 && vls_type == VLS_LOAD
1938 && loop_vinfo
1939 && !loop->inner);
1941 /* There can only be a gap at the end of the group if the stride is
1942 known at compile time. */
1943 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1945 /* Stores can't yet have gaps. */
1946 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1948 if (slp)
1950 if (STMT_VINFO_STRIDED_P (stmt_info))
1952 /* Try to use consecutive accesses of GROUP_SIZE elements,
1953 separated by the stride, until we have a complete vector.
1954 Fall back to scalar accesses if that isn't possible. */
1955 if (multiple_p (nunits, group_size))
1956 *memory_access_type = VMAT_STRIDED_SLP;
1957 else
1958 *memory_access_type = VMAT_ELEMENTWISE;
1960 else
1962 overrun_p = loop_vinfo && gap != 0;
1963 if (overrun_p && vls_type != VLS_LOAD)
1965 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1966 "Grouped store with gaps requires"
1967 " non-consecutive accesses\n");
1968 return false;
1970 /* An overrun is fine if the trailing elements are smaller
1971 than the alignment boundary B. Every vector access will
1972 be a multiple of B and so we are guaranteed to access a
1973 non-gap element in the same B-sized block. */
1974 if (overrun_p
1975 && gap < (vect_known_alignment_in_bytes (first_dr)
1976 / vect_get_scalar_dr_size (first_dr)))
1977 overrun_p = false;
1978 if (overrun_p && !can_overrun_p)
1980 if (dump_enabled_p ())
1981 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1982 "Peeling for outer loop is not supported\n");
1983 return false;
1985 *memory_access_type = VMAT_CONTIGUOUS;
1988 else
1990 /* We can always handle this case using elementwise accesses,
1991 but see if something more efficient is available. */
1992 *memory_access_type = VMAT_ELEMENTWISE;
1994 /* If there is a gap at the end of the group then these optimizations
1995 would access excess elements in the last iteration. */
1996 bool would_overrun_p = (gap != 0);
1997 /* An overrun is fine if the trailing elements are smaller than the
1998 alignment boundary B. Every vector access will be a multiple of B
1999 and so we are guaranteed to access a non-gap element in the
2000 same B-sized block. */
2001 if (would_overrun_p
2002 && !masked_p
2003 && gap < (vect_known_alignment_in_bytes (first_dr)
2004 / vect_get_scalar_dr_size (first_dr)))
2005 would_overrun_p = false;
2007 if (!STMT_VINFO_STRIDED_P (stmt_info)
2008 && (can_overrun_p || !would_overrun_p)
2009 && compare_step_with_zero (stmt) > 0)
2011 /* First cope with the degenerate case of a single-element
2012 vector. */
2013 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2014 *memory_access_type = VMAT_CONTIGUOUS;
2016 /* Otherwise try using LOAD/STORE_LANES. */
2017 if (*memory_access_type == VMAT_ELEMENTWISE
2018 && (vls_type == VLS_LOAD
2019 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2020 : vect_store_lanes_supported (vectype, group_size,
2021 masked_p)))
2023 *memory_access_type = VMAT_LOAD_STORE_LANES;
2024 overrun_p = would_overrun_p;
2027 /* If that fails, try using permuting loads. */
2028 if (*memory_access_type == VMAT_ELEMENTWISE
2029 && (vls_type == VLS_LOAD
2030 ? vect_grouped_load_supported (vectype, single_element_p,
2031 group_size)
2032 : vect_grouped_store_supported (vectype, group_size)))
2034 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2035 overrun_p = would_overrun_p;
2040 if (vls_type != VLS_LOAD && first_stmt == stmt)
2042 /* STMT is the leader of the group. Check the operands of all the
2043 stmts of the group. */
2044 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
2045 while (next_stmt)
2047 tree op = vect_get_store_rhs (next_stmt);
2048 gimple *def_stmt;
2049 enum vect_def_type dt;
2050 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
2052 if (dump_enabled_p ())
2053 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2054 "use not simple.\n");
2055 return false;
2057 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2061 if (overrun_p)
2063 gcc_assert (can_overrun_p);
2064 if (dump_enabled_p ())
2065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2066 "Data access with gaps requires scalar "
2067 "epilogue loop\n");
2068 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2071 return true;
2074 /* A subroutine of get_load_store_type, with a subset of the same
2075 arguments. Handle the case where STMT is a load or store that
2076 accesses consecutive elements with a negative step. */
2078 static vect_memory_access_type
2079 get_negative_load_store_type (gimple *stmt, tree vectype,
2080 vec_load_store_type vls_type,
2081 unsigned int ncopies)
2083 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2084 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2085 dr_alignment_support alignment_support_scheme;
2087 if (ncopies > 1)
2089 if (dump_enabled_p ())
2090 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2091 "multiple types with negative step.\n");
2092 return VMAT_ELEMENTWISE;
2095 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
2096 if (alignment_support_scheme != dr_aligned
2097 && alignment_support_scheme != dr_unaligned_supported)
2099 if (dump_enabled_p ())
2100 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2101 "negative step but alignment required.\n");
2102 return VMAT_ELEMENTWISE;
2105 if (vls_type == VLS_STORE_INVARIANT)
2107 if (dump_enabled_p ())
2108 dump_printf_loc (MSG_NOTE, vect_location,
2109 "negative step with invariant source;"
2110 " no permute needed.\n");
2111 return VMAT_CONTIGUOUS_DOWN;
2114 if (!perm_mask_for_reverse (vectype))
2116 if (dump_enabled_p ())
2117 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2118 "negative step and reversing not supported.\n");
2119 return VMAT_ELEMENTWISE;
2122 return VMAT_CONTIGUOUS_REVERSE;
2125 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
2126 if there is a memory access type that the vectorized form can use,
2127 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2128 or scatters, fill in GS_INFO accordingly.
2130 SLP says whether we're performing SLP rather than loop vectorization.
2131 MASKED_P is true if the statement is conditional on a vectorized mask.
2132 VECTYPE is the vector type that the vectorized statements will use.
2133 NCOPIES is the number of vector statements that will be needed. */
2135 static bool
2136 get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p,
2137 vec_load_store_type vls_type, unsigned int ncopies,
2138 vect_memory_access_type *memory_access_type,
2139 gather_scatter_info *gs_info)
2141 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2142 vec_info *vinfo = stmt_info->vinfo;
2143 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2144 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2145 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2147 *memory_access_type = VMAT_GATHER_SCATTER;
2148 gimple *def_stmt;
2149 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
2150 gcc_unreachable ();
2151 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
2152 &gs_info->offset_dt,
2153 &gs_info->offset_vectype))
2155 if (dump_enabled_p ())
2156 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2157 "%s index use not simple.\n",
2158 vls_type == VLS_LOAD ? "gather" : "scatter");
2159 return false;
2162 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2164 if (!get_group_load_store_type (stmt, vectype, slp, masked_p, vls_type,
2165 memory_access_type))
2166 return false;
2168 else if (STMT_VINFO_STRIDED_P (stmt_info))
2170 gcc_assert (!slp);
2171 *memory_access_type = VMAT_ELEMENTWISE;
2173 else
2175 int cmp = compare_step_with_zero (stmt);
2176 if (cmp < 0)
2177 *memory_access_type = get_negative_load_store_type
2178 (stmt, vectype, vls_type, ncopies);
2179 else if (cmp == 0)
2181 gcc_assert (vls_type == VLS_LOAD);
2182 *memory_access_type = VMAT_INVARIANT;
2184 else
2185 *memory_access_type = VMAT_CONTIGUOUS;
2188 if ((*memory_access_type == VMAT_ELEMENTWISE
2189 || *memory_access_type == VMAT_STRIDED_SLP)
2190 && !nunits.is_constant ())
2192 if (dump_enabled_p ())
2193 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2194 "Not using elementwise accesses due to variable "
2195 "vectorization factor.\n");
2196 return false;
2199 /* FIXME: At the moment the cost model seems to underestimate the
2200 cost of using elementwise accesses. This check preserves the
2201 traditional behavior until that can be fixed. */
2202 if (*memory_access_type == VMAT_ELEMENTWISE
2203 && !STMT_VINFO_STRIDED_P (stmt_info)
2204 && !(stmt == GROUP_FIRST_ELEMENT (stmt_info)
2205 && !GROUP_NEXT_ELEMENT (stmt_info)
2206 && !pow2p_hwi (GROUP_SIZE (stmt_info))))
2208 if (dump_enabled_p ())
2209 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2210 "not falling back to elementwise accesses\n");
2211 return false;
2213 return true;
2216 /* Return true if boolean argument MASK is suitable for vectorizing
2217 conditional load or store STMT. When returning true, store the
2218 type of the vectorized mask in *MASK_VECTYPE_OUT. */
2220 static bool
2221 vect_check_load_store_mask (gimple *stmt, tree mask, tree *mask_vectype_out)
2223 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2225 if (dump_enabled_p ())
2226 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2227 "mask argument is not a boolean.\n");
2228 return false;
2231 if (TREE_CODE (mask) != SSA_NAME)
2233 if (dump_enabled_p ())
2234 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2235 "mask argument is not an SSA name.\n");
2236 return false;
2239 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2240 gimple *def_stmt;
2241 enum vect_def_type dt;
2242 tree mask_vectype;
2243 if (!vect_is_simple_use (mask, stmt_info->vinfo, &def_stmt, &dt,
2244 &mask_vectype))
2246 if (dump_enabled_p ())
2247 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2248 "mask use not simple.\n");
2249 return false;
2252 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2253 if (!mask_vectype)
2254 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2256 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2258 if (dump_enabled_p ())
2259 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2260 "could not find an appropriate vector mask type.\n");
2261 return false;
2264 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2265 TYPE_VECTOR_SUBPARTS (vectype)))
2267 if (dump_enabled_p ())
2269 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2270 "vector mask type ");
2271 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
2272 dump_printf (MSG_MISSED_OPTIMIZATION,
2273 " does not match vector data type ");
2274 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
2275 dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
2277 return false;
2280 *mask_vectype_out = mask_vectype;
2281 return true;
2284 /* Return true if stored value RHS is suitable for vectorizing store
2285 statement STMT. When returning true, store the type of the
2286 vectorized store value in *RHS_VECTYPE_OUT and the type of the
2287 store in *VLS_TYPE_OUT. */
2289 static bool
2290 vect_check_store_rhs (gimple *stmt, tree rhs, tree *rhs_vectype_out,
2291 vec_load_store_type *vls_type_out)
2293 /* In the case this is a store from a constant make sure
2294 native_encode_expr can handle it. */
2295 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2297 if (dump_enabled_p ())
2298 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2299 "cannot encode constant as a byte sequence.\n");
2300 return false;
2303 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2304 gimple *def_stmt;
2305 enum vect_def_type dt;
2306 tree rhs_vectype;
2307 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &def_stmt, &dt,
2308 &rhs_vectype))
2310 if (dump_enabled_p ())
2311 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2312 "use not simple.\n");
2313 return false;
2316 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2317 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2319 if (dump_enabled_p ())
2320 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2321 "incompatible vector types.\n");
2322 return false;
2325 *rhs_vectype_out = rhs_vectype;
2326 if (dt == vect_constant_def || dt == vect_external_def)
2327 *vls_type_out = VLS_STORE_INVARIANT;
2328 else
2329 *vls_type_out = VLS_STORE;
2330 return true;
2333 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2334 Note that we support masks with floating-point type, in which case the
2335 floats are interpreted as a bitmask. */
2337 static tree
2338 vect_build_all_ones_mask (gimple *stmt, tree masktype)
2340 if (TREE_CODE (masktype) == INTEGER_TYPE)
2341 return build_int_cst (masktype, -1);
2342 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2344 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2345 mask = build_vector_from_val (masktype, mask);
2346 return vect_init_vector (stmt, mask, masktype, NULL);
2348 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2350 REAL_VALUE_TYPE r;
2351 long tmp[6];
2352 for (int j = 0; j < 6; ++j)
2353 tmp[j] = -1;
2354 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2355 tree mask = build_real (TREE_TYPE (masktype), r);
2356 mask = build_vector_from_val (masktype, mask);
2357 return vect_init_vector (stmt, mask, masktype, NULL);
2359 gcc_unreachable ();
2362 /* Build an all-zero merge value of type VECTYPE while vectorizing
2363 STMT as a gather load. */
2365 static tree
2366 vect_build_zero_merge_argument (gimple *stmt, tree vectype)
2368 tree merge;
2369 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2370 merge = build_int_cst (TREE_TYPE (vectype), 0);
2371 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2373 REAL_VALUE_TYPE r;
2374 long tmp[6];
2375 for (int j = 0; j < 6; ++j)
2376 tmp[j] = 0;
2377 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2378 merge = build_real (TREE_TYPE (vectype), r);
2380 else
2381 gcc_unreachable ();
2382 merge = build_vector_from_val (vectype, merge);
2383 return vect_init_vector (stmt, merge, vectype, NULL);
2386 /* Build a gather load call while vectorizing STMT. Insert new instructions
2387 before GSI and add them to VEC_STMT. GS_INFO describes the gather load
2388 operation. If the load is conditional, MASK is the unvectorized
2389 condition, otherwise MASK is null. */
2391 static void
2392 vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
2393 gimple **vec_stmt, gather_scatter_info *gs_info,
2394 tree mask)
2396 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2397 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2398 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2399 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2400 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2401 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2402 edge pe = loop_preheader_edge (loop);
2403 enum { NARROW, NONE, WIDEN } modifier;
2404 poly_uint64 gather_off_nunits
2405 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2407 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2408 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2409 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2410 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2411 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2412 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2413 tree scaletype = TREE_VALUE (arglist);
2414 gcc_checking_assert (types_compatible_p (srctype, rettype)
2415 && (!mask || types_compatible_p (srctype, masktype)));
2417 tree perm_mask = NULL_TREE;
2418 tree mask_perm_mask = NULL_TREE;
2419 if (known_eq (nunits, gather_off_nunits))
2420 modifier = NONE;
2421 else if (known_eq (nunits * 2, gather_off_nunits))
2423 modifier = WIDEN;
2425 /* Currently widening gathers and scatters are only supported for
2426 fixed-length vectors. */
2427 int count = gather_off_nunits.to_constant ();
2428 vec_perm_builder sel (count, count, 1);
2429 for (int i = 0; i < count; ++i)
2430 sel.quick_push (i | (count / 2));
2432 vec_perm_indices indices (sel, 1, count);
2433 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2434 indices);
2436 else if (known_eq (nunits, gather_off_nunits * 2))
2438 modifier = NARROW;
2440 /* Currently narrowing gathers and scatters are only supported for
2441 fixed-length vectors. */
2442 int count = nunits.to_constant ();
2443 vec_perm_builder sel (count, count, 1);
2444 sel.quick_grow (count);
2445 for (int i = 0; i < count; ++i)
2446 sel[i] = i < count / 2 ? i : i + count / 2;
2447 vec_perm_indices indices (sel, 2, count);
2448 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2450 ncopies *= 2;
2452 if (mask)
2454 for (int i = 0; i < count; ++i)
2455 sel[i] = i | (count / 2);
2456 indices.new_vector (sel, 2, count);
2457 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2460 else
2461 gcc_unreachable ();
2463 tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
2464 vectype);
2466 tree ptr = fold_convert (ptrtype, gs_info->base);
2467 if (!is_gimple_min_invariant (ptr))
2469 gimple_seq seq;
2470 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2471 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2472 gcc_assert (!new_bb);
2475 tree scale = build_int_cst (scaletype, gs_info->scale);
2477 tree vec_oprnd0 = NULL_TREE;
2478 tree vec_mask = NULL_TREE;
2479 tree src_op = NULL_TREE;
2480 tree mask_op = NULL_TREE;
2481 tree prev_res = NULL_TREE;
2482 stmt_vec_info prev_stmt_info = NULL;
2484 if (!mask)
2486 src_op = vect_build_zero_merge_argument (stmt, rettype);
2487 mask_op = vect_build_all_ones_mask (stmt, masktype);
2490 for (int j = 0; j < ncopies; ++j)
2492 tree op, var;
2493 gimple *new_stmt;
2494 if (modifier == WIDEN && (j & 1))
2495 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2496 perm_mask, stmt, gsi);
2497 else if (j == 0)
2498 op = vec_oprnd0
2499 = vect_get_vec_def_for_operand (gs_info->offset, stmt);
2500 else
2501 op = vec_oprnd0
2502 = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
2504 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2506 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2507 TYPE_VECTOR_SUBPARTS (idxtype)));
2508 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2509 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2510 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2511 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2512 op = var;
2515 if (mask)
2517 if (mask_perm_mask && (j & 1))
2518 mask_op = permute_vec_elements (mask_op, mask_op,
2519 mask_perm_mask, stmt, gsi);
2520 else
2522 if (j == 0)
2523 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2524 else
2526 gimple *def_stmt;
2527 enum vect_def_type dt;
2528 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2529 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2532 mask_op = vec_mask;
2533 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2535 gcc_assert
2536 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2537 TYPE_VECTOR_SUBPARTS (masktype)));
2538 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2539 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2540 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR,
2541 mask_op);
2542 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2543 mask_op = var;
2546 src_op = mask_op;
2549 new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2550 mask_op, scale);
2552 if (!useless_type_conversion_p (vectype, rettype))
2554 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2555 TYPE_VECTOR_SUBPARTS (rettype)));
2556 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2557 gimple_call_set_lhs (new_stmt, op);
2558 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2559 var = make_ssa_name (vec_dest);
2560 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2561 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2563 else
2565 var = make_ssa_name (vec_dest, new_stmt);
2566 gimple_call_set_lhs (new_stmt, var);
2569 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2571 if (modifier == NARROW)
2573 if ((j & 1) == 0)
2575 prev_res = var;
2576 continue;
2578 var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
2579 new_stmt = SSA_NAME_DEF_STMT (var);
2582 if (prev_stmt_info == NULL)
2583 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2584 else
2585 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2586 prev_stmt_info = vinfo_for_stmt (new_stmt);
2590 /* Prepare the base and offset in GS_INFO for vectorization.
2591 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2592 to the vectorized offset argument for the first copy of STMT. STMT
2593 is the statement described by GS_INFO and LOOP is the containing loop. */
2595 static void
2596 vect_get_gather_scatter_ops (struct loop *loop, gimple *stmt,
2597 gather_scatter_info *gs_info,
2598 tree *dataref_ptr, tree *vec_offset)
2600 gimple_seq stmts = NULL;
2601 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2602 if (stmts != NULL)
2604 basic_block new_bb;
2605 edge pe = loop_preheader_edge (loop);
2606 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2607 gcc_assert (!new_bb);
2609 tree offset_type = TREE_TYPE (gs_info->offset);
2610 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2611 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt,
2612 offset_vectype);
2615 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2617 static bool
2618 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2619 gimple **vec_stmt, slp_tree slp_node,
2620 tree vectype_in, enum vect_def_type *dt)
2622 tree op, vectype;
2623 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2624 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2625 unsigned ncopies;
2626 unsigned HOST_WIDE_INT nunits, num_bytes;
2628 op = gimple_call_arg (stmt, 0);
2629 vectype = STMT_VINFO_VECTYPE (stmt_info);
2631 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
2632 return false;
2634 /* Multiple types in SLP are handled by creating the appropriate number of
2635 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2636 case of SLP. */
2637 if (slp_node)
2638 ncopies = 1;
2639 else
2640 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2642 gcc_assert (ncopies >= 1);
2644 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2645 if (! char_vectype)
2646 return false;
2648 if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
2649 return false;
2651 unsigned word_bytes = num_bytes / nunits;
2653 /* The encoding uses one stepped pattern for each byte in the word. */
2654 vec_perm_builder elts (num_bytes, word_bytes, 3);
2655 for (unsigned i = 0; i < 3; ++i)
2656 for (unsigned j = 0; j < word_bytes; ++j)
2657 elts.quick_push ((i + 1) * word_bytes - j - 1);
2659 vec_perm_indices indices (elts, 1, num_bytes);
2660 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2661 return false;
2663 if (! vec_stmt)
2665 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2666 if (dump_enabled_p ())
2667 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2668 "\n");
2669 if (! PURE_SLP_STMT (stmt_info))
2671 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2672 1, vector_stmt, stmt_info, 0, vect_prologue);
2673 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2674 ncopies, vec_perm, stmt_info, 0, vect_body);
2676 return true;
2679 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
2681 /* Transform. */
2682 vec<tree> vec_oprnds = vNULL;
2683 gimple *new_stmt = NULL;
2684 stmt_vec_info prev_stmt_info = NULL;
2685 for (unsigned j = 0; j < ncopies; j++)
2687 /* Handle uses. */
2688 if (j == 0)
2689 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2690 else
2691 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2693 /* Arguments are ready. create the new vector stmt. */
2694 unsigned i;
2695 tree vop;
2696 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2698 tree tem = make_ssa_name (char_vectype);
2699 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2700 char_vectype, vop));
2701 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2702 tree tem2 = make_ssa_name (char_vectype);
2703 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2704 tem, tem, bswap_vconst);
2705 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2706 tem = make_ssa_name (vectype);
2707 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2708 vectype, tem2));
2709 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2710 if (slp_node)
2711 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2714 if (slp_node)
2715 continue;
2717 if (j == 0)
2718 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2719 else
2720 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2722 prev_stmt_info = vinfo_for_stmt (new_stmt);
2725 vec_oprnds.release ();
2726 return true;
2729 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2730 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2731 in a single step. On success, store the binary pack code in
2732 *CONVERT_CODE. */
2734 static bool
2735 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2736 tree_code *convert_code)
2738 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2739 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2740 return false;
2742 tree_code code;
2743 int multi_step_cvt = 0;
2744 auto_vec <tree, 8> interm_types;
2745 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2746 &code, &multi_step_cvt,
2747 &interm_types)
2748 || multi_step_cvt)
2749 return false;
2751 *convert_code = code;
2752 return true;
2755 /* Function vectorizable_call.
2757 Check if GS performs a function call that can be vectorized.
2758 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2759 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2760 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2762 static bool
2763 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2764 slp_tree slp_node)
2766 gcall *stmt;
2767 tree vec_dest;
2768 tree scalar_dest;
2769 tree op, type;
2770 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2771 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2772 tree vectype_out, vectype_in;
2773 poly_uint64 nunits_in;
2774 poly_uint64 nunits_out;
2775 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2776 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2777 vec_info *vinfo = stmt_info->vinfo;
2778 tree fndecl, new_temp, rhs_type;
2779 gimple *def_stmt;
2780 enum vect_def_type dt[3]
2781 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2782 int ndts = 3;
2783 gimple *new_stmt = NULL;
2784 int ncopies, j;
2785 vec<tree> vargs = vNULL;
2786 enum { NARROW, NONE, WIDEN } modifier;
2787 size_t i, nargs;
2788 tree lhs;
2790 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2791 return false;
2793 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2794 && ! vec_stmt)
2795 return false;
2797 /* Is GS a vectorizable call? */
2798 stmt = dyn_cast <gcall *> (gs);
2799 if (!stmt)
2800 return false;
2802 if (gimple_call_internal_p (stmt)
2803 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
2804 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2805 /* Handled by vectorizable_load and vectorizable_store. */
2806 return false;
2808 if (gimple_call_lhs (stmt) == NULL_TREE
2809 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2810 return false;
2812 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2814 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2816 /* Process function arguments. */
2817 rhs_type = NULL_TREE;
2818 vectype_in = NULL_TREE;
2819 nargs = gimple_call_num_args (stmt);
2821 /* Bail out if the function has more than three arguments, we do not have
2822 interesting builtin functions to vectorize with more than two arguments
2823 except for fma. No arguments is also not good. */
2824 if (nargs == 0 || nargs > 3)
2825 return false;
2827 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2828 if (gimple_call_internal_p (stmt)
2829 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2831 nargs = 0;
2832 rhs_type = unsigned_type_node;
2835 for (i = 0; i < nargs; i++)
2837 tree opvectype;
2839 op = gimple_call_arg (stmt, i);
2841 /* We can only handle calls with arguments of the same type. */
2842 if (rhs_type
2843 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2845 if (dump_enabled_p ())
2846 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2847 "argument types differ.\n");
2848 return false;
2850 if (!rhs_type)
2851 rhs_type = TREE_TYPE (op);
2853 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2855 if (dump_enabled_p ())
2856 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2857 "use not simple.\n");
2858 return false;
2861 if (!vectype_in)
2862 vectype_in = opvectype;
2863 else if (opvectype
2864 && opvectype != vectype_in)
2866 if (dump_enabled_p ())
2867 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2868 "argument vector types differ.\n");
2869 return false;
2872 /* If all arguments are external or constant defs use a vector type with
2873 the same size as the output vector type. */
2874 if (!vectype_in)
2875 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2876 if (vec_stmt)
2877 gcc_assert (vectype_in);
2878 if (!vectype_in)
2880 if (dump_enabled_p ())
2882 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2883 "no vectype for scalar type ");
2884 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2885 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2888 return false;
2891 /* FORNOW */
2892 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2893 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2894 if (known_eq (nunits_in * 2, nunits_out))
2895 modifier = NARROW;
2896 else if (known_eq (nunits_out, nunits_in))
2897 modifier = NONE;
2898 else if (known_eq (nunits_out * 2, nunits_in))
2899 modifier = WIDEN;
2900 else
2901 return false;
2903 /* We only handle functions that do not read or clobber memory. */
2904 if (gimple_vuse (stmt))
2906 if (dump_enabled_p ())
2907 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2908 "function reads from or writes to memory.\n");
2909 return false;
2912 /* For now, we only vectorize functions if a target specific builtin
2913 is available. TODO -- in some cases, it might be profitable to
2914 insert the calls for pieces of the vector, in order to be able
2915 to vectorize other operations in the loop. */
2916 fndecl = NULL_TREE;
2917 internal_fn ifn = IFN_LAST;
2918 combined_fn cfn = gimple_call_combined_fn (stmt);
2919 tree callee = gimple_call_fndecl (stmt);
2921 /* First try using an internal function. */
2922 tree_code convert_code = ERROR_MARK;
2923 if (cfn != CFN_LAST
2924 && (modifier == NONE
2925 || (modifier == NARROW
2926 && simple_integer_narrowing (vectype_out, vectype_in,
2927 &convert_code))))
2928 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2929 vectype_in);
2931 /* If that fails, try asking for a target-specific built-in function. */
2932 if (ifn == IFN_LAST)
2934 if (cfn != CFN_LAST)
2935 fndecl = targetm.vectorize.builtin_vectorized_function
2936 (cfn, vectype_out, vectype_in);
2937 else
2938 fndecl = targetm.vectorize.builtin_md_vectorized_function
2939 (callee, vectype_out, vectype_in);
2942 if (ifn == IFN_LAST && !fndecl)
2944 if (cfn == CFN_GOMP_SIMD_LANE
2945 && !slp_node
2946 && loop_vinfo
2947 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2948 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2949 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2950 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2952 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2953 { 0, 1, 2, ... vf - 1 } vector. */
2954 gcc_assert (nargs == 0);
2956 else if (modifier == NONE
2957 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
2958 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
2959 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
2960 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
2961 vectype_in, dt);
2962 else
2964 if (dump_enabled_p ())
2965 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2966 "function is not vectorizable.\n");
2967 return false;
2971 if (slp_node)
2972 ncopies = 1;
2973 else if (modifier == NARROW && ifn == IFN_LAST)
2974 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
2975 else
2976 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
2978 /* Sanity check: make sure that at least one copy of the vectorized stmt
2979 needs to be generated. */
2980 gcc_assert (ncopies >= 1);
2982 if (!vec_stmt) /* transformation not required. */
2984 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2985 if (dump_enabled_p ())
2986 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2987 "\n");
2988 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
2989 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2990 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2991 vec_promote_demote, stmt_info, 0, vect_body);
2993 return true;
2996 /* Transform. */
2998 if (dump_enabled_p ())
2999 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3001 /* Handle def. */
3002 scalar_dest = gimple_call_lhs (stmt);
3003 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3005 prev_stmt_info = NULL;
3006 if (modifier == NONE || ifn != IFN_LAST)
3008 tree prev_res = NULL_TREE;
3009 for (j = 0; j < ncopies; ++j)
3011 /* Build argument list for the vectorized call. */
3012 if (j == 0)
3013 vargs.create (nargs);
3014 else
3015 vargs.truncate (0);
3017 if (slp_node)
3019 auto_vec<vec<tree> > vec_defs (nargs);
3020 vec<tree> vec_oprnds0;
3022 for (i = 0; i < nargs; i++)
3023 vargs.quick_push (gimple_call_arg (stmt, i));
3024 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3025 vec_oprnds0 = vec_defs[0];
3027 /* Arguments are ready. Create the new vector stmt. */
3028 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3030 size_t k;
3031 for (k = 0; k < nargs; k++)
3033 vec<tree> vec_oprndsk = vec_defs[k];
3034 vargs[k] = vec_oprndsk[i];
3036 if (modifier == NARROW)
3038 tree half_res = make_ssa_name (vectype_in);
3039 gcall *call
3040 = gimple_build_call_internal_vec (ifn, vargs);
3041 gimple_call_set_lhs (call, half_res);
3042 gimple_call_set_nothrow (call, true);
3043 new_stmt = call;
3044 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3045 if ((i & 1) == 0)
3047 prev_res = half_res;
3048 continue;
3050 new_temp = make_ssa_name (vec_dest);
3051 new_stmt = gimple_build_assign (new_temp, convert_code,
3052 prev_res, half_res);
3054 else
3056 gcall *call;
3057 if (ifn != IFN_LAST)
3058 call = gimple_build_call_internal_vec (ifn, vargs);
3059 else
3060 call = gimple_build_call_vec (fndecl, vargs);
3061 new_temp = make_ssa_name (vec_dest, call);
3062 gimple_call_set_lhs (call, new_temp);
3063 gimple_call_set_nothrow (call, true);
3064 new_stmt = call;
3066 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3067 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3070 for (i = 0; i < nargs; i++)
3072 vec<tree> vec_oprndsi = vec_defs[i];
3073 vec_oprndsi.release ();
3075 continue;
3078 for (i = 0; i < nargs; i++)
3080 op = gimple_call_arg (stmt, i);
3081 if (j == 0)
3082 vec_oprnd0
3083 = vect_get_vec_def_for_operand (op, stmt);
3084 else
3086 vec_oprnd0 = gimple_call_arg (new_stmt, i);
3087 vec_oprnd0
3088 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3091 vargs.quick_push (vec_oprnd0);
3094 if (gimple_call_internal_p (stmt)
3095 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3097 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3098 tree new_var
3099 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3100 gimple *init_stmt = gimple_build_assign (new_var, cst);
3101 vect_init_vector_1 (stmt, init_stmt, NULL);
3102 new_temp = make_ssa_name (vec_dest);
3103 new_stmt = gimple_build_assign (new_temp, new_var);
3105 else if (modifier == NARROW)
3107 tree half_res = make_ssa_name (vectype_in);
3108 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3109 gimple_call_set_lhs (call, half_res);
3110 gimple_call_set_nothrow (call, true);
3111 new_stmt = call;
3112 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3113 if ((j & 1) == 0)
3115 prev_res = half_res;
3116 continue;
3118 new_temp = make_ssa_name (vec_dest);
3119 new_stmt = gimple_build_assign (new_temp, convert_code,
3120 prev_res, half_res);
3122 else
3124 gcall *call;
3125 if (ifn != IFN_LAST)
3126 call = gimple_build_call_internal_vec (ifn, vargs);
3127 else
3128 call = gimple_build_call_vec (fndecl, vargs);
3129 new_temp = make_ssa_name (vec_dest, new_stmt);
3130 gimple_call_set_lhs (call, new_temp);
3131 gimple_call_set_nothrow (call, true);
3132 new_stmt = call;
3134 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3136 if (j == (modifier == NARROW ? 1 : 0))
3137 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3138 else
3139 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3141 prev_stmt_info = vinfo_for_stmt (new_stmt);
3144 else if (modifier == NARROW)
3146 for (j = 0; j < ncopies; ++j)
3148 /* Build argument list for the vectorized call. */
3149 if (j == 0)
3150 vargs.create (nargs * 2);
3151 else
3152 vargs.truncate (0);
3154 if (slp_node)
3156 auto_vec<vec<tree> > vec_defs (nargs);
3157 vec<tree> vec_oprnds0;
3159 for (i = 0; i < nargs; i++)
3160 vargs.quick_push (gimple_call_arg (stmt, i));
3161 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3162 vec_oprnds0 = vec_defs[0];
3164 /* Arguments are ready. Create the new vector stmt. */
3165 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3167 size_t k;
3168 vargs.truncate (0);
3169 for (k = 0; k < nargs; k++)
3171 vec<tree> vec_oprndsk = vec_defs[k];
3172 vargs.quick_push (vec_oprndsk[i]);
3173 vargs.quick_push (vec_oprndsk[i + 1]);
3175 gcall *call;
3176 if (ifn != IFN_LAST)
3177 call = gimple_build_call_internal_vec (ifn, vargs);
3178 else
3179 call = gimple_build_call_vec (fndecl, vargs);
3180 new_temp = make_ssa_name (vec_dest, call);
3181 gimple_call_set_lhs (call, new_temp);
3182 gimple_call_set_nothrow (call, true);
3183 new_stmt = call;
3184 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3185 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3188 for (i = 0; i < nargs; i++)
3190 vec<tree> vec_oprndsi = vec_defs[i];
3191 vec_oprndsi.release ();
3193 continue;
3196 for (i = 0; i < nargs; i++)
3198 op = gimple_call_arg (stmt, i);
3199 if (j == 0)
3201 vec_oprnd0
3202 = vect_get_vec_def_for_operand (op, stmt);
3203 vec_oprnd1
3204 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3206 else
3208 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3209 vec_oprnd0
3210 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3211 vec_oprnd1
3212 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3215 vargs.quick_push (vec_oprnd0);
3216 vargs.quick_push (vec_oprnd1);
3219 new_stmt = gimple_build_call_vec (fndecl, vargs);
3220 new_temp = make_ssa_name (vec_dest, new_stmt);
3221 gimple_call_set_lhs (new_stmt, new_temp);
3222 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3224 if (j == 0)
3225 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3226 else
3227 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3229 prev_stmt_info = vinfo_for_stmt (new_stmt);
3232 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3234 else
3235 /* No current target implements this case. */
3236 return false;
3238 vargs.release ();
3240 /* The call in STMT might prevent it from being removed in dce.
3241 We however cannot remove it here, due to the way the ssa name
3242 it defines is mapped to the new definition. So just replace
3243 rhs of the statement with something harmless. */
3245 if (slp_node)
3246 return true;
3248 type = TREE_TYPE (scalar_dest);
3249 if (is_pattern_stmt_p (stmt_info))
3250 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3251 else
3252 lhs = gimple_call_lhs (stmt);
3254 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3255 set_vinfo_for_stmt (new_stmt, stmt_info);
3256 set_vinfo_for_stmt (stmt, NULL);
3257 STMT_VINFO_STMT (stmt_info) = new_stmt;
3258 gsi_replace (gsi, new_stmt, false);
3260 return true;
3264 struct simd_call_arg_info
3266 tree vectype;
3267 tree op;
3268 HOST_WIDE_INT linear_step;
3269 enum vect_def_type dt;
3270 unsigned int align;
3271 bool simd_lane_linear;
3274 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3275 is linear within simd lane (but not within whole loop), note it in
3276 *ARGINFO. */
3278 static void
3279 vect_simd_lane_linear (tree op, struct loop *loop,
3280 struct simd_call_arg_info *arginfo)
3282 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3284 if (!is_gimple_assign (def_stmt)
3285 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3286 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3287 return;
3289 tree base = gimple_assign_rhs1 (def_stmt);
3290 HOST_WIDE_INT linear_step = 0;
3291 tree v = gimple_assign_rhs2 (def_stmt);
3292 while (TREE_CODE (v) == SSA_NAME)
3294 tree t;
3295 def_stmt = SSA_NAME_DEF_STMT (v);
3296 if (is_gimple_assign (def_stmt))
3297 switch (gimple_assign_rhs_code (def_stmt))
3299 case PLUS_EXPR:
3300 t = gimple_assign_rhs2 (def_stmt);
3301 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3302 return;
3303 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3304 v = gimple_assign_rhs1 (def_stmt);
3305 continue;
3306 case MULT_EXPR:
3307 t = gimple_assign_rhs2 (def_stmt);
3308 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3309 return;
3310 linear_step = tree_to_shwi (t);
3311 v = gimple_assign_rhs1 (def_stmt);
3312 continue;
3313 CASE_CONVERT:
3314 t = gimple_assign_rhs1 (def_stmt);
3315 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3316 || (TYPE_PRECISION (TREE_TYPE (v))
3317 < TYPE_PRECISION (TREE_TYPE (t))))
3318 return;
3319 if (!linear_step)
3320 linear_step = 1;
3321 v = t;
3322 continue;
3323 default:
3324 return;
3326 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3327 && loop->simduid
3328 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3329 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3330 == loop->simduid))
3332 if (!linear_step)
3333 linear_step = 1;
3334 arginfo->linear_step = linear_step;
3335 arginfo->op = base;
3336 arginfo->simd_lane_linear = true;
3337 return;
3342 /* Return the number of elements in vector type VECTYPE, which is associated
3343 with a SIMD clone. At present these vectors always have a constant
3344 length. */
3346 static unsigned HOST_WIDE_INT
3347 simd_clone_subparts (tree vectype)
3349 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3352 /* Function vectorizable_simd_clone_call.
3354 Check if STMT performs a function call that can be vectorized
3355 by calling a simd clone of the function.
3356 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3357 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3358 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3360 static bool
3361 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3362 gimple **vec_stmt, slp_tree slp_node)
3364 tree vec_dest;
3365 tree scalar_dest;
3366 tree op, type;
3367 tree vec_oprnd0 = NULL_TREE;
3368 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3369 tree vectype;
3370 unsigned int nunits;
3371 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3372 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3373 vec_info *vinfo = stmt_info->vinfo;
3374 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3375 tree fndecl, new_temp;
3376 gimple *def_stmt;
3377 gimple *new_stmt = NULL;
3378 int ncopies, j;
3379 auto_vec<simd_call_arg_info> arginfo;
3380 vec<tree> vargs = vNULL;
3381 size_t i, nargs;
3382 tree lhs, rtype, ratype;
3383 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3385 /* Is STMT a vectorizable call? */
3386 if (!is_gimple_call (stmt))
3387 return false;
3389 fndecl = gimple_call_fndecl (stmt);
3390 if (fndecl == NULL_TREE)
3391 return false;
3393 struct cgraph_node *node = cgraph_node::get (fndecl);
3394 if (node == NULL || node->simd_clones == NULL)
3395 return false;
3397 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3398 return false;
3400 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3401 && ! vec_stmt)
3402 return false;
3404 if (gimple_call_lhs (stmt)
3405 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3406 return false;
3408 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3410 vectype = STMT_VINFO_VECTYPE (stmt_info);
3412 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3413 return false;
3415 /* FORNOW */
3416 if (slp_node)
3417 return false;
3419 /* Process function arguments. */
3420 nargs = gimple_call_num_args (stmt);
3422 /* Bail out if the function has zero arguments. */
3423 if (nargs == 0)
3424 return false;
3426 arginfo.reserve (nargs, true);
3428 for (i = 0; i < nargs; i++)
3430 simd_call_arg_info thisarginfo;
3431 affine_iv iv;
3433 thisarginfo.linear_step = 0;
3434 thisarginfo.align = 0;
3435 thisarginfo.op = NULL_TREE;
3436 thisarginfo.simd_lane_linear = false;
3438 op = gimple_call_arg (stmt, i);
3439 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3440 &thisarginfo.vectype)
3441 || thisarginfo.dt == vect_uninitialized_def)
3443 if (dump_enabled_p ())
3444 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3445 "use not simple.\n");
3446 return false;
3449 if (thisarginfo.dt == vect_constant_def
3450 || thisarginfo.dt == vect_external_def)
3451 gcc_assert (thisarginfo.vectype == NULL_TREE);
3452 else
3453 gcc_assert (thisarginfo.vectype != NULL_TREE);
3455 /* For linear arguments, the analyze phase should have saved
3456 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3457 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3458 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3460 gcc_assert (vec_stmt);
3461 thisarginfo.linear_step
3462 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3463 thisarginfo.op
3464 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3465 thisarginfo.simd_lane_linear
3466 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3467 == boolean_true_node);
3468 /* If loop has been peeled for alignment, we need to adjust it. */
3469 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3470 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3471 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3473 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3474 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3475 tree opt = TREE_TYPE (thisarginfo.op);
3476 bias = fold_convert (TREE_TYPE (step), bias);
3477 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3478 thisarginfo.op
3479 = fold_build2 (POINTER_TYPE_P (opt)
3480 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3481 thisarginfo.op, bias);
3484 else if (!vec_stmt
3485 && thisarginfo.dt != vect_constant_def
3486 && thisarginfo.dt != vect_external_def
3487 && loop_vinfo
3488 && TREE_CODE (op) == SSA_NAME
3489 && simple_iv (loop, loop_containing_stmt (stmt), op,
3490 &iv, false)
3491 && tree_fits_shwi_p (iv.step))
3493 thisarginfo.linear_step = tree_to_shwi (iv.step);
3494 thisarginfo.op = iv.base;
3496 else if ((thisarginfo.dt == vect_constant_def
3497 || thisarginfo.dt == vect_external_def)
3498 && POINTER_TYPE_P (TREE_TYPE (op)))
3499 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3500 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3501 linear too. */
3502 if (POINTER_TYPE_P (TREE_TYPE (op))
3503 && !thisarginfo.linear_step
3504 && !vec_stmt
3505 && thisarginfo.dt != vect_constant_def
3506 && thisarginfo.dt != vect_external_def
3507 && loop_vinfo
3508 && !slp_node
3509 && TREE_CODE (op) == SSA_NAME)
3510 vect_simd_lane_linear (op, loop, &thisarginfo);
3512 arginfo.quick_push (thisarginfo);
3515 unsigned HOST_WIDE_INT vf;
3516 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3518 if (dump_enabled_p ())
3519 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3520 "not considering SIMD clones; not yet supported"
3521 " for variable-width vectors.\n");
3522 return NULL;
3525 unsigned int badness = 0;
3526 struct cgraph_node *bestn = NULL;
3527 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3528 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3529 else
3530 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3531 n = n->simdclone->next_clone)
3533 unsigned int this_badness = 0;
3534 if (n->simdclone->simdlen > vf
3535 || n->simdclone->nargs != nargs)
3536 continue;
3537 if (n->simdclone->simdlen < vf)
3538 this_badness += (exact_log2 (vf)
3539 - exact_log2 (n->simdclone->simdlen)) * 1024;
3540 if (n->simdclone->inbranch)
3541 this_badness += 2048;
3542 int target_badness = targetm.simd_clone.usable (n);
3543 if (target_badness < 0)
3544 continue;
3545 this_badness += target_badness * 512;
3546 /* FORNOW: Have to add code to add the mask argument. */
3547 if (n->simdclone->inbranch)
3548 continue;
3549 for (i = 0; i < nargs; i++)
3551 switch (n->simdclone->args[i].arg_type)
3553 case SIMD_CLONE_ARG_TYPE_VECTOR:
3554 if (!useless_type_conversion_p
3555 (n->simdclone->args[i].orig_type,
3556 TREE_TYPE (gimple_call_arg (stmt, i))))
3557 i = -1;
3558 else if (arginfo[i].dt == vect_constant_def
3559 || arginfo[i].dt == vect_external_def
3560 || arginfo[i].linear_step)
3561 this_badness += 64;
3562 break;
3563 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3564 if (arginfo[i].dt != vect_constant_def
3565 && arginfo[i].dt != vect_external_def)
3566 i = -1;
3567 break;
3568 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3569 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3570 if (arginfo[i].dt == vect_constant_def
3571 || arginfo[i].dt == vect_external_def
3572 || (arginfo[i].linear_step
3573 != n->simdclone->args[i].linear_step))
3574 i = -1;
3575 break;
3576 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3577 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3578 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3579 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3580 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3581 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3582 /* FORNOW */
3583 i = -1;
3584 break;
3585 case SIMD_CLONE_ARG_TYPE_MASK:
3586 gcc_unreachable ();
3588 if (i == (size_t) -1)
3589 break;
3590 if (n->simdclone->args[i].alignment > arginfo[i].align)
3592 i = -1;
3593 break;
3595 if (arginfo[i].align)
3596 this_badness += (exact_log2 (arginfo[i].align)
3597 - exact_log2 (n->simdclone->args[i].alignment));
3599 if (i == (size_t) -1)
3600 continue;
3601 if (bestn == NULL || this_badness < badness)
3603 bestn = n;
3604 badness = this_badness;
3608 if (bestn == NULL)
3609 return false;
3611 for (i = 0; i < nargs; i++)
3612 if ((arginfo[i].dt == vect_constant_def
3613 || arginfo[i].dt == vect_external_def)
3614 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3616 arginfo[i].vectype
3617 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3618 i)));
3619 if (arginfo[i].vectype == NULL
3620 || (simd_clone_subparts (arginfo[i].vectype)
3621 > bestn->simdclone->simdlen))
3622 return false;
3625 fndecl = bestn->decl;
3626 nunits = bestn->simdclone->simdlen;
3627 ncopies = vf / nunits;
3629 /* If the function isn't const, only allow it in simd loops where user
3630 has asserted that at least nunits consecutive iterations can be
3631 performed using SIMD instructions. */
3632 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3633 && gimple_vuse (stmt))
3634 return false;
3636 /* Sanity check: make sure that at least one copy of the vectorized stmt
3637 needs to be generated. */
3638 gcc_assert (ncopies >= 1);
3640 if (!vec_stmt) /* transformation not required. */
3642 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3643 for (i = 0; i < nargs; i++)
3644 if ((bestn->simdclone->args[i].arg_type
3645 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3646 || (bestn->simdclone->args[i].arg_type
3647 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3649 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3650 + 1);
3651 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3652 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3653 ? size_type_node : TREE_TYPE (arginfo[i].op);
3654 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3655 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3656 tree sll = arginfo[i].simd_lane_linear
3657 ? boolean_true_node : boolean_false_node;
3658 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3660 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3661 if (dump_enabled_p ())
3662 dump_printf_loc (MSG_NOTE, vect_location,
3663 "=== vectorizable_simd_clone_call ===\n");
3664 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3665 return true;
3668 /* Transform. */
3670 if (dump_enabled_p ())
3671 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3673 /* Handle def. */
3674 scalar_dest = gimple_call_lhs (stmt);
3675 vec_dest = NULL_TREE;
3676 rtype = NULL_TREE;
3677 ratype = NULL_TREE;
3678 if (scalar_dest)
3680 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3681 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3682 if (TREE_CODE (rtype) == ARRAY_TYPE)
3684 ratype = rtype;
3685 rtype = TREE_TYPE (ratype);
3689 prev_stmt_info = NULL;
3690 for (j = 0; j < ncopies; ++j)
3692 /* Build argument list for the vectorized call. */
3693 if (j == 0)
3694 vargs.create (nargs);
3695 else
3696 vargs.truncate (0);
3698 for (i = 0; i < nargs; i++)
3700 unsigned int k, l, m, o;
3701 tree atype;
3702 op = gimple_call_arg (stmt, i);
3703 switch (bestn->simdclone->args[i].arg_type)
3705 case SIMD_CLONE_ARG_TYPE_VECTOR:
3706 atype = bestn->simdclone->args[i].vector_type;
3707 o = nunits / simd_clone_subparts (atype);
3708 for (m = j * o; m < (j + 1) * o; m++)
3710 if (simd_clone_subparts (atype)
3711 < simd_clone_subparts (arginfo[i].vectype))
3713 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3714 k = (simd_clone_subparts (arginfo[i].vectype)
3715 / simd_clone_subparts (atype));
3716 gcc_assert ((k & (k - 1)) == 0);
3717 if (m == 0)
3718 vec_oprnd0
3719 = vect_get_vec_def_for_operand (op, stmt);
3720 else
3722 vec_oprnd0 = arginfo[i].op;
3723 if ((m & (k - 1)) == 0)
3724 vec_oprnd0
3725 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3726 vec_oprnd0);
3728 arginfo[i].op = vec_oprnd0;
3729 vec_oprnd0
3730 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3731 bitsize_int (prec),
3732 bitsize_int ((m & (k - 1)) * prec));
3733 new_stmt
3734 = gimple_build_assign (make_ssa_name (atype),
3735 vec_oprnd0);
3736 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3737 vargs.safe_push (gimple_assign_lhs (new_stmt));
3739 else
3741 k = (simd_clone_subparts (atype)
3742 / simd_clone_subparts (arginfo[i].vectype));
3743 gcc_assert ((k & (k - 1)) == 0);
3744 vec<constructor_elt, va_gc> *ctor_elts;
3745 if (k != 1)
3746 vec_alloc (ctor_elts, k);
3747 else
3748 ctor_elts = NULL;
3749 for (l = 0; l < k; l++)
3751 if (m == 0 && l == 0)
3752 vec_oprnd0
3753 = vect_get_vec_def_for_operand (op, stmt);
3754 else
3755 vec_oprnd0
3756 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3757 arginfo[i].op);
3758 arginfo[i].op = vec_oprnd0;
3759 if (k == 1)
3760 break;
3761 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3762 vec_oprnd0);
3764 if (k == 1)
3765 vargs.safe_push (vec_oprnd0);
3766 else
3768 vec_oprnd0 = build_constructor (atype, ctor_elts);
3769 new_stmt
3770 = gimple_build_assign (make_ssa_name (atype),
3771 vec_oprnd0);
3772 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3773 vargs.safe_push (gimple_assign_lhs (new_stmt));
3777 break;
3778 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3779 vargs.safe_push (op);
3780 break;
3781 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3782 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3783 if (j == 0)
3785 gimple_seq stmts;
3786 arginfo[i].op
3787 = force_gimple_operand (arginfo[i].op, &stmts, true,
3788 NULL_TREE);
3789 if (stmts != NULL)
3791 basic_block new_bb;
3792 edge pe = loop_preheader_edge (loop);
3793 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3794 gcc_assert (!new_bb);
3796 if (arginfo[i].simd_lane_linear)
3798 vargs.safe_push (arginfo[i].op);
3799 break;
3801 tree phi_res = copy_ssa_name (op);
3802 gphi *new_phi = create_phi_node (phi_res, loop->header);
3803 set_vinfo_for_stmt (new_phi,
3804 new_stmt_vec_info (new_phi, loop_vinfo));
3805 add_phi_arg (new_phi, arginfo[i].op,
3806 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3807 enum tree_code code
3808 = POINTER_TYPE_P (TREE_TYPE (op))
3809 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3810 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3811 ? sizetype : TREE_TYPE (op);
3812 widest_int cst
3813 = wi::mul (bestn->simdclone->args[i].linear_step,
3814 ncopies * nunits);
3815 tree tcst = wide_int_to_tree (type, cst);
3816 tree phi_arg = copy_ssa_name (op);
3817 new_stmt
3818 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3819 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3820 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3821 set_vinfo_for_stmt (new_stmt,
3822 new_stmt_vec_info (new_stmt, loop_vinfo));
3823 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3824 UNKNOWN_LOCATION);
3825 arginfo[i].op = phi_res;
3826 vargs.safe_push (phi_res);
3828 else
3830 enum tree_code code
3831 = POINTER_TYPE_P (TREE_TYPE (op))
3832 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3833 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3834 ? sizetype : TREE_TYPE (op);
3835 widest_int cst
3836 = wi::mul (bestn->simdclone->args[i].linear_step,
3837 j * nunits);
3838 tree tcst = wide_int_to_tree (type, cst);
3839 new_temp = make_ssa_name (TREE_TYPE (op));
3840 new_stmt = gimple_build_assign (new_temp, code,
3841 arginfo[i].op, tcst);
3842 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3843 vargs.safe_push (new_temp);
3845 break;
3846 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3847 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3848 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3849 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3850 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3851 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3852 default:
3853 gcc_unreachable ();
3857 new_stmt = gimple_build_call_vec (fndecl, vargs);
3858 if (vec_dest)
3860 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
3861 if (ratype)
3862 new_temp = create_tmp_var (ratype);
3863 else if (simd_clone_subparts (vectype)
3864 == simd_clone_subparts (rtype))
3865 new_temp = make_ssa_name (vec_dest, new_stmt);
3866 else
3867 new_temp = make_ssa_name (rtype, new_stmt);
3868 gimple_call_set_lhs (new_stmt, new_temp);
3870 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3872 if (vec_dest)
3874 if (simd_clone_subparts (vectype) < nunits)
3876 unsigned int k, l;
3877 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3878 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
3879 k = nunits / simd_clone_subparts (vectype);
3880 gcc_assert ((k & (k - 1)) == 0);
3881 for (l = 0; l < k; l++)
3883 tree t;
3884 if (ratype)
3886 t = build_fold_addr_expr (new_temp);
3887 t = build2 (MEM_REF, vectype, t,
3888 build_int_cst (TREE_TYPE (t), l * bytes));
3890 else
3891 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3892 bitsize_int (prec), bitsize_int (l * prec));
3893 new_stmt
3894 = gimple_build_assign (make_ssa_name (vectype), t);
3895 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3896 if (j == 0 && l == 0)
3897 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3898 else
3899 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3901 prev_stmt_info = vinfo_for_stmt (new_stmt);
3904 if (ratype)
3906 tree clobber = build_constructor (ratype, NULL);
3907 TREE_THIS_VOLATILE (clobber) = 1;
3908 new_stmt = gimple_build_assign (new_temp, clobber);
3909 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3911 continue;
3913 else if (simd_clone_subparts (vectype) > nunits)
3915 unsigned int k = (simd_clone_subparts (vectype)
3916 / simd_clone_subparts (rtype));
3917 gcc_assert ((k & (k - 1)) == 0);
3918 if ((j & (k - 1)) == 0)
3919 vec_alloc (ret_ctor_elts, k);
3920 if (ratype)
3922 unsigned int m, o = nunits / simd_clone_subparts (rtype);
3923 for (m = 0; m < o; m++)
3925 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3926 size_int (m), NULL_TREE, NULL_TREE);
3927 new_stmt
3928 = gimple_build_assign (make_ssa_name (rtype), tem);
3929 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3930 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3931 gimple_assign_lhs (new_stmt));
3933 tree clobber = build_constructor (ratype, NULL);
3934 TREE_THIS_VOLATILE (clobber) = 1;
3935 new_stmt = gimple_build_assign (new_temp, clobber);
3936 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3938 else
3939 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3940 if ((j & (k - 1)) != k - 1)
3941 continue;
3942 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3943 new_stmt
3944 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3945 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3947 if ((unsigned) j == k - 1)
3948 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3949 else
3950 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3952 prev_stmt_info = vinfo_for_stmt (new_stmt);
3953 continue;
3955 else if (ratype)
3957 tree t = build_fold_addr_expr (new_temp);
3958 t = build2 (MEM_REF, vectype, t,
3959 build_int_cst (TREE_TYPE (t), 0));
3960 new_stmt
3961 = gimple_build_assign (make_ssa_name (vec_dest), t);
3962 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3963 tree clobber = build_constructor (ratype, NULL);
3964 TREE_THIS_VOLATILE (clobber) = 1;
3965 vect_finish_stmt_generation (stmt,
3966 gimple_build_assign (new_temp,
3967 clobber), gsi);
3971 if (j == 0)
3972 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3973 else
3974 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3976 prev_stmt_info = vinfo_for_stmt (new_stmt);
3979 vargs.release ();
3981 /* The call in STMT might prevent it from being removed in dce.
3982 We however cannot remove it here, due to the way the ssa name
3983 it defines is mapped to the new definition. So just replace
3984 rhs of the statement with something harmless. */
3986 if (slp_node)
3987 return true;
3989 if (scalar_dest)
3991 type = TREE_TYPE (scalar_dest);
3992 if (is_pattern_stmt_p (stmt_info))
3993 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3994 else
3995 lhs = gimple_call_lhs (stmt);
3996 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3998 else
3999 new_stmt = gimple_build_nop ();
4000 set_vinfo_for_stmt (new_stmt, stmt_info);
4001 set_vinfo_for_stmt (stmt, NULL);
4002 STMT_VINFO_STMT (stmt_info) = new_stmt;
4003 gsi_replace (gsi, new_stmt, true);
4004 unlink_stmt_vdef (stmt);
4006 return true;
4010 /* Function vect_gen_widened_results_half
4012 Create a vector stmt whose code, type, number of arguments, and result
4013 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4014 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
4015 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4016 needs to be created (DECL is a function-decl of a target-builtin).
4017 STMT is the original scalar stmt that we are vectorizing. */
4019 static gimple *
4020 vect_gen_widened_results_half (enum tree_code code,
4021 tree decl,
4022 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4023 tree vec_dest, gimple_stmt_iterator *gsi,
4024 gimple *stmt)
4026 gimple *new_stmt;
4027 tree new_temp;
4029 /* Generate half of the widened result: */
4030 if (code == CALL_EXPR)
4032 /* Target specific support */
4033 if (op_type == binary_op)
4034 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4035 else
4036 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4037 new_temp = make_ssa_name (vec_dest, new_stmt);
4038 gimple_call_set_lhs (new_stmt, new_temp);
4040 else
4042 /* Generic support */
4043 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4044 if (op_type != binary_op)
4045 vec_oprnd1 = NULL;
4046 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4047 new_temp = make_ssa_name (vec_dest, new_stmt);
4048 gimple_assign_set_lhs (new_stmt, new_temp);
4050 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4052 return new_stmt;
4056 /* Get vectorized definitions for loop-based vectorization. For the first
4057 operand we call vect_get_vec_def_for_operand() (with OPRND containing
4058 scalar operand), and for the rest we get a copy with
4059 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4060 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4061 The vectors are collected into VEC_OPRNDS. */
4063 static void
4064 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
4065 vec<tree> *vec_oprnds, int multi_step_cvt)
4067 tree vec_oprnd;
4069 /* Get first vector operand. */
4070 /* All the vector operands except the very first one (that is scalar oprnd)
4071 are stmt copies. */
4072 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4073 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4074 else
4075 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
4077 vec_oprnds->quick_push (vec_oprnd);
4079 /* Get second vector operand. */
4080 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
4081 vec_oprnds->quick_push (vec_oprnd);
4083 *oprnd = vec_oprnd;
4085 /* For conversion in multiple steps, continue to get operands
4086 recursively. */
4087 if (multi_step_cvt)
4088 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
4092 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4093 For multi-step conversions store the resulting vectors and call the function
4094 recursively. */
4096 static void
4097 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4098 int multi_step_cvt, gimple *stmt,
4099 vec<tree> vec_dsts,
4100 gimple_stmt_iterator *gsi,
4101 slp_tree slp_node, enum tree_code code,
4102 stmt_vec_info *prev_stmt_info)
4104 unsigned int i;
4105 tree vop0, vop1, new_tmp, vec_dest;
4106 gimple *new_stmt;
4107 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4109 vec_dest = vec_dsts.pop ();
4111 for (i = 0; i < vec_oprnds->length (); i += 2)
4113 /* Create demotion operation. */
4114 vop0 = (*vec_oprnds)[i];
4115 vop1 = (*vec_oprnds)[i + 1];
4116 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4117 new_tmp = make_ssa_name (vec_dest, new_stmt);
4118 gimple_assign_set_lhs (new_stmt, new_tmp);
4119 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4121 if (multi_step_cvt)
4122 /* Store the resulting vector for next recursive call. */
4123 (*vec_oprnds)[i/2] = new_tmp;
4124 else
4126 /* This is the last step of the conversion sequence. Store the
4127 vectors in SLP_NODE or in vector info of the scalar statement
4128 (or in STMT_VINFO_RELATED_STMT chain). */
4129 if (slp_node)
4130 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4131 else
4133 if (!*prev_stmt_info)
4134 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4135 else
4136 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4138 *prev_stmt_info = vinfo_for_stmt (new_stmt);
4143 /* For multi-step demotion operations we first generate demotion operations
4144 from the source type to the intermediate types, and then combine the
4145 results (stored in VEC_OPRNDS) in demotion operation to the destination
4146 type. */
4147 if (multi_step_cvt)
4149 /* At each level of recursion we have half of the operands we had at the
4150 previous level. */
4151 vec_oprnds->truncate ((i+1)/2);
4152 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4153 stmt, vec_dsts, gsi, slp_node,
4154 VEC_PACK_TRUNC_EXPR,
4155 prev_stmt_info);
4158 vec_dsts.quick_push (vec_dest);
4162 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4163 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4164 the resulting vectors and call the function recursively. */
4166 static void
4167 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4168 vec<tree> *vec_oprnds1,
4169 gimple *stmt, tree vec_dest,
4170 gimple_stmt_iterator *gsi,
4171 enum tree_code code1,
4172 enum tree_code code2, tree decl1,
4173 tree decl2, int op_type)
4175 int i;
4176 tree vop0, vop1, new_tmp1, new_tmp2;
4177 gimple *new_stmt1, *new_stmt2;
4178 vec<tree> vec_tmp = vNULL;
4180 vec_tmp.create (vec_oprnds0->length () * 2);
4181 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4183 if (op_type == binary_op)
4184 vop1 = (*vec_oprnds1)[i];
4185 else
4186 vop1 = NULL_TREE;
4188 /* Generate the two halves of promotion operation. */
4189 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4190 op_type, vec_dest, gsi, stmt);
4191 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4192 op_type, vec_dest, gsi, stmt);
4193 if (is_gimple_call (new_stmt1))
4195 new_tmp1 = gimple_call_lhs (new_stmt1);
4196 new_tmp2 = gimple_call_lhs (new_stmt2);
4198 else
4200 new_tmp1 = gimple_assign_lhs (new_stmt1);
4201 new_tmp2 = gimple_assign_lhs (new_stmt2);
4204 /* Store the results for the next step. */
4205 vec_tmp.quick_push (new_tmp1);
4206 vec_tmp.quick_push (new_tmp2);
4209 vec_oprnds0->release ();
4210 *vec_oprnds0 = vec_tmp;
4214 /* Check if STMT performs a conversion operation, that can be vectorized.
4215 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4216 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4217 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4219 static bool
4220 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4221 gimple **vec_stmt, slp_tree slp_node)
4223 tree vec_dest;
4224 tree scalar_dest;
4225 tree op0, op1 = NULL_TREE;
4226 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4227 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4228 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4229 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4230 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4231 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4232 tree new_temp;
4233 gimple *def_stmt;
4234 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4235 int ndts = 2;
4236 gimple *new_stmt = NULL;
4237 stmt_vec_info prev_stmt_info;
4238 poly_uint64 nunits_in;
4239 poly_uint64 nunits_out;
4240 tree vectype_out, vectype_in;
4241 int ncopies, i, j;
4242 tree lhs_type, rhs_type;
4243 enum { NARROW, NONE, WIDEN } modifier;
4244 vec<tree> vec_oprnds0 = vNULL;
4245 vec<tree> vec_oprnds1 = vNULL;
4246 tree vop0;
4247 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4248 vec_info *vinfo = stmt_info->vinfo;
4249 int multi_step_cvt = 0;
4250 vec<tree> interm_types = vNULL;
4251 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4252 int op_type;
4253 unsigned short fltsz;
4255 /* Is STMT a vectorizable conversion? */
4257 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4258 return false;
4260 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4261 && ! vec_stmt)
4262 return false;
4264 if (!is_gimple_assign (stmt))
4265 return false;
4267 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4268 return false;
4270 code = gimple_assign_rhs_code (stmt);
4271 if (!CONVERT_EXPR_CODE_P (code)
4272 && code != FIX_TRUNC_EXPR
4273 && code != FLOAT_EXPR
4274 && code != WIDEN_MULT_EXPR
4275 && code != WIDEN_LSHIFT_EXPR)
4276 return false;
4278 op_type = TREE_CODE_LENGTH (code);
4280 /* Check types of lhs and rhs. */
4281 scalar_dest = gimple_assign_lhs (stmt);
4282 lhs_type = TREE_TYPE (scalar_dest);
4283 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4285 op0 = gimple_assign_rhs1 (stmt);
4286 rhs_type = TREE_TYPE (op0);
4288 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4289 && !((INTEGRAL_TYPE_P (lhs_type)
4290 && INTEGRAL_TYPE_P (rhs_type))
4291 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4292 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4293 return false;
4295 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4296 && ((INTEGRAL_TYPE_P (lhs_type)
4297 && !type_has_mode_precision_p (lhs_type))
4298 || (INTEGRAL_TYPE_P (rhs_type)
4299 && !type_has_mode_precision_p (rhs_type))))
4301 if (dump_enabled_p ())
4302 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4303 "type conversion to/from bit-precision unsupported."
4304 "\n");
4305 return false;
4308 /* Check the operands of the operation. */
4309 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4311 if (dump_enabled_p ())
4312 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4313 "use not simple.\n");
4314 return false;
4316 if (op_type == binary_op)
4318 bool ok;
4320 op1 = gimple_assign_rhs2 (stmt);
4321 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4322 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4323 OP1. */
4324 if (CONSTANT_CLASS_P (op0))
4325 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4326 else
4327 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4329 if (!ok)
4331 if (dump_enabled_p ())
4332 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4333 "use not simple.\n");
4334 return false;
4338 /* If op0 is an external or constant defs use a vector type of
4339 the same size as the output vector type. */
4340 if (!vectype_in)
4341 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4342 if (vec_stmt)
4343 gcc_assert (vectype_in);
4344 if (!vectype_in)
4346 if (dump_enabled_p ())
4348 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4349 "no vectype for scalar type ");
4350 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4351 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4354 return false;
4357 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4358 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4360 if (dump_enabled_p ())
4362 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4363 "can't convert between boolean and non "
4364 "boolean vectors");
4365 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4366 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4369 return false;
4372 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4373 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4374 if (known_eq (nunits_out, nunits_in))
4375 modifier = NONE;
4376 else if (multiple_p (nunits_out, nunits_in))
4377 modifier = NARROW;
4378 else
4380 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4381 modifier = WIDEN;
4384 /* Multiple types in SLP are handled by creating the appropriate number of
4385 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4386 case of SLP. */
4387 if (slp_node)
4388 ncopies = 1;
4389 else if (modifier == NARROW)
4390 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4391 else
4392 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4394 /* Sanity check: make sure that at least one copy of the vectorized stmt
4395 needs to be generated. */
4396 gcc_assert (ncopies >= 1);
4398 bool found_mode = false;
4399 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4400 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4401 opt_scalar_mode rhs_mode_iter;
4403 /* Supportable by target? */
4404 switch (modifier)
4406 case NONE:
4407 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4408 return false;
4409 if (supportable_convert_operation (code, vectype_out, vectype_in,
4410 &decl1, &code1))
4411 break;
4412 /* FALLTHRU */
4413 unsupported:
4414 if (dump_enabled_p ())
4415 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4416 "conversion not supported by target.\n");
4417 return false;
4419 case WIDEN:
4420 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4421 &code1, &code2, &multi_step_cvt,
4422 &interm_types))
4424 /* Binary widening operation can only be supported directly by the
4425 architecture. */
4426 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4427 break;
4430 if (code != FLOAT_EXPR
4431 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4432 goto unsupported;
4434 fltsz = GET_MODE_SIZE (lhs_mode);
4435 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4437 rhs_mode = rhs_mode_iter.require ();
4438 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4439 break;
4441 cvt_type
4442 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4443 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4444 if (cvt_type == NULL_TREE)
4445 goto unsupported;
4447 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4449 if (!supportable_convert_operation (code, vectype_out,
4450 cvt_type, &decl1, &codecvt1))
4451 goto unsupported;
4453 else if (!supportable_widening_operation (code, stmt, vectype_out,
4454 cvt_type, &codecvt1,
4455 &codecvt2, &multi_step_cvt,
4456 &interm_types))
4457 continue;
4458 else
4459 gcc_assert (multi_step_cvt == 0);
4461 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4462 vectype_in, &code1, &code2,
4463 &multi_step_cvt, &interm_types))
4465 found_mode = true;
4466 break;
4470 if (!found_mode)
4471 goto unsupported;
4473 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4474 codecvt2 = ERROR_MARK;
4475 else
4477 multi_step_cvt++;
4478 interm_types.safe_push (cvt_type);
4479 cvt_type = NULL_TREE;
4481 break;
4483 case NARROW:
4484 gcc_assert (op_type == unary_op);
4485 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4486 &code1, &multi_step_cvt,
4487 &interm_types))
4488 break;
4490 if (code != FIX_TRUNC_EXPR
4491 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4492 goto unsupported;
4494 cvt_type
4495 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4496 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4497 if (cvt_type == NULL_TREE)
4498 goto unsupported;
4499 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4500 &decl1, &codecvt1))
4501 goto unsupported;
4502 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4503 &code1, &multi_step_cvt,
4504 &interm_types))
4505 break;
4506 goto unsupported;
4508 default:
4509 gcc_unreachable ();
4512 if (!vec_stmt) /* transformation not required. */
4514 if (dump_enabled_p ())
4515 dump_printf_loc (MSG_NOTE, vect_location,
4516 "=== vectorizable_conversion ===\n");
4517 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4519 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4520 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4522 else if (modifier == NARROW)
4524 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4525 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4527 else
4529 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4530 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4532 interm_types.release ();
4533 return true;
4536 /* Transform. */
4537 if (dump_enabled_p ())
4538 dump_printf_loc (MSG_NOTE, vect_location,
4539 "transform conversion. ncopies = %d.\n", ncopies);
4541 if (op_type == binary_op)
4543 if (CONSTANT_CLASS_P (op0))
4544 op0 = fold_convert (TREE_TYPE (op1), op0);
4545 else if (CONSTANT_CLASS_P (op1))
4546 op1 = fold_convert (TREE_TYPE (op0), op1);
4549 /* In case of multi-step conversion, we first generate conversion operations
4550 to the intermediate types, and then from that types to the final one.
4551 We create vector destinations for the intermediate type (TYPES) received
4552 from supportable_*_operation, and store them in the correct order
4553 for future use in vect_create_vectorized_*_stmts (). */
4554 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4555 vec_dest = vect_create_destination_var (scalar_dest,
4556 (cvt_type && modifier == WIDEN)
4557 ? cvt_type : vectype_out);
4558 vec_dsts.quick_push (vec_dest);
4560 if (multi_step_cvt)
4562 for (i = interm_types.length () - 1;
4563 interm_types.iterate (i, &intermediate_type); i--)
4565 vec_dest = vect_create_destination_var (scalar_dest,
4566 intermediate_type);
4567 vec_dsts.quick_push (vec_dest);
4571 if (cvt_type)
4572 vec_dest = vect_create_destination_var (scalar_dest,
4573 modifier == WIDEN
4574 ? vectype_out : cvt_type);
4576 if (!slp_node)
4578 if (modifier == WIDEN)
4580 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4581 if (op_type == binary_op)
4582 vec_oprnds1.create (1);
4584 else if (modifier == NARROW)
4585 vec_oprnds0.create (
4586 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4588 else if (code == WIDEN_LSHIFT_EXPR)
4589 vec_oprnds1.create (slp_node->vec_stmts_size);
4591 last_oprnd = op0;
4592 prev_stmt_info = NULL;
4593 switch (modifier)
4595 case NONE:
4596 for (j = 0; j < ncopies; j++)
4598 if (j == 0)
4599 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
4600 else
4601 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4603 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4605 /* Arguments are ready, create the new vector stmt. */
4606 if (code1 == CALL_EXPR)
4608 new_stmt = gimple_build_call (decl1, 1, vop0);
4609 new_temp = make_ssa_name (vec_dest, new_stmt);
4610 gimple_call_set_lhs (new_stmt, new_temp);
4612 else
4614 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4615 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4616 new_temp = make_ssa_name (vec_dest, new_stmt);
4617 gimple_assign_set_lhs (new_stmt, new_temp);
4620 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4621 if (slp_node)
4622 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4623 else
4625 if (!prev_stmt_info)
4626 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4627 else
4628 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4629 prev_stmt_info = vinfo_for_stmt (new_stmt);
4633 break;
4635 case WIDEN:
4636 /* In case the vectorization factor (VF) is bigger than the number
4637 of elements that we can fit in a vectype (nunits), we have to
4638 generate more than one vector stmt - i.e - we need to "unroll"
4639 the vector stmt by a factor VF/nunits. */
4640 for (j = 0; j < ncopies; j++)
4642 /* Handle uses. */
4643 if (j == 0)
4645 if (slp_node)
4647 if (code == WIDEN_LSHIFT_EXPR)
4649 unsigned int k;
4651 vec_oprnd1 = op1;
4652 /* Store vec_oprnd1 for every vector stmt to be created
4653 for SLP_NODE. We check during the analysis that all
4654 the shift arguments are the same. */
4655 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4656 vec_oprnds1.quick_push (vec_oprnd1);
4658 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4659 slp_node);
4661 else
4662 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4663 &vec_oprnds1, slp_node);
4665 else
4667 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4668 vec_oprnds0.quick_push (vec_oprnd0);
4669 if (op_type == binary_op)
4671 if (code == WIDEN_LSHIFT_EXPR)
4672 vec_oprnd1 = op1;
4673 else
4674 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4675 vec_oprnds1.quick_push (vec_oprnd1);
4679 else
4681 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4682 vec_oprnds0.truncate (0);
4683 vec_oprnds0.quick_push (vec_oprnd0);
4684 if (op_type == binary_op)
4686 if (code == WIDEN_LSHIFT_EXPR)
4687 vec_oprnd1 = op1;
4688 else
4689 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4690 vec_oprnd1);
4691 vec_oprnds1.truncate (0);
4692 vec_oprnds1.quick_push (vec_oprnd1);
4696 /* Arguments are ready. Create the new vector stmts. */
4697 for (i = multi_step_cvt; i >= 0; i--)
4699 tree this_dest = vec_dsts[i];
4700 enum tree_code c1 = code1, c2 = code2;
4701 if (i == 0 && codecvt2 != ERROR_MARK)
4703 c1 = codecvt1;
4704 c2 = codecvt2;
4706 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4707 &vec_oprnds1,
4708 stmt, this_dest, gsi,
4709 c1, c2, decl1, decl2,
4710 op_type);
4713 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4715 if (cvt_type)
4717 if (codecvt1 == CALL_EXPR)
4719 new_stmt = gimple_build_call (decl1, 1, vop0);
4720 new_temp = make_ssa_name (vec_dest, new_stmt);
4721 gimple_call_set_lhs (new_stmt, new_temp);
4723 else
4725 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4726 new_temp = make_ssa_name (vec_dest);
4727 new_stmt = gimple_build_assign (new_temp, codecvt1,
4728 vop0);
4731 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4733 else
4734 new_stmt = SSA_NAME_DEF_STMT (vop0);
4736 if (slp_node)
4737 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4738 else
4740 if (!prev_stmt_info)
4741 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4742 else
4743 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4744 prev_stmt_info = vinfo_for_stmt (new_stmt);
4749 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4750 break;
4752 case NARROW:
4753 /* In case the vectorization factor (VF) is bigger than the number
4754 of elements that we can fit in a vectype (nunits), we have to
4755 generate more than one vector stmt - i.e - we need to "unroll"
4756 the vector stmt by a factor VF/nunits. */
4757 for (j = 0; j < ncopies; j++)
4759 /* Handle uses. */
4760 if (slp_node)
4761 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4762 slp_node);
4763 else
4765 vec_oprnds0.truncate (0);
4766 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4767 vect_pow2 (multi_step_cvt) - 1);
4770 /* Arguments are ready. Create the new vector stmts. */
4771 if (cvt_type)
4772 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4774 if (codecvt1 == CALL_EXPR)
4776 new_stmt = gimple_build_call (decl1, 1, vop0);
4777 new_temp = make_ssa_name (vec_dest, new_stmt);
4778 gimple_call_set_lhs (new_stmt, new_temp);
4780 else
4782 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4783 new_temp = make_ssa_name (vec_dest);
4784 new_stmt = gimple_build_assign (new_temp, codecvt1,
4785 vop0);
4788 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4789 vec_oprnds0[i] = new_temp;
4792 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4793 stmt, vec_dsts, gsi,
4794 slp_node, code1,
4795 &prev_stmt_info);
4798 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4799 break;
4802 vec_oprnds0.release ();
4803 vec_oprnds1.release ();
4804 interm_types.release ();
4806 return true;
4810 /* Function vectorizable_assignment.
4812 Check if STMT performs an assignment (copy) that can be vectorized.
4813 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4814 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4815 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4817 static bool
4818 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4819 gimple **vec_stmt, slp_tree slp_node)
4821 tree vec_dest;
4822 tree scalar_dest;
4823 tree op;
4824 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4825 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4826 tree new_temp;
4827 gimple *def_stmt;
4828 enum vect_def_type dt[1] = {vect_unknown_def_type};
4829 int ndts = 1;
4830 int ncopies;
4831 int i, j;
4832 vec<tree> vec_oprnds = vNULL;
4833 tree vop;
4834 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4835 vec_info *vinfo = stmt_info->vinfo;
4836 gimple *new_stmt = NULL;
4837 stmt_vec_info prev_stmt_info = NULL;
4838 enum tree_code code;
4839 tree vectype_in;
4841 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4842 return false;
4844 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4845 && ! vec_stmt)
4846 return false;
4848 /* Is vectorizable assignment? */
4849 if (!is_gimple_assign (stmt))
4850 return false;
4852 scalar_dest = gimple_assign_lhs (stmt);
4853 if (TREE_CODE (scalar_dest) != SSA_NAME)
4854 return false;
4856 code = gimple_assign_rhs_code (stmt);
4857 if (gimple_assign_single_p (stmt)
4858 || code == PAREN_EXPR
4859 || CONVERT_EXPR_CODE_P (code))
4860 op = gimple_assign_rhs1 (stmt);
4861 else
4862 return false;
4864 if (code == VIEW_CONVERT_EXPR)
4865 op = TREE_OPERAND (op, 0);
4867 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4868 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4870 /* Multiple types in SLP are handled by creating the appropriate number of
4871 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4872 case of SLP. */
4873 if (slp_node)
4874 ncopies = 1;
4875 else
4876 ncopies = vect_get_num_copies (loop_vinfo, vectype);
4878 gcc_assert (ncopies >= 1);
4880 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4882 if (dump_enabled_p ())
4883 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4884 "use not simple.\n");
4885 return false;
4888 /* We can handle NOP_EXPR conversions that do not change the number
4889 of elements or the vector size. */
4890 if ((CONVERT_EXPR_CODE_P (code)
4891 || code == VIEW_CONVERT_EXPR)
4892 && (!vectype_in
4893 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
4894 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
4895 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4896 return false;
4898 /* We do not handle bit-precision changes. */
4899 if ((CONVERT_EXPR_CODE_P (code)
4900 || code == VIEW_CONVERT_EXPR)
4901 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4902 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
4903 || !type_has_mode_precision_p (TREE_TYPE (op)))
4904 /* But a conversion that does not change the bit-pattern is ok. */
4905 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4906 > TYPE_PRECISION (TREE_TYPE (op)))
4907 && TYPE_UNSIGNED (TREE_TYPE (op)))
4908 /* Conversion between boolean types of different sizes is
4909 a simple assignment in case their vectypes are same
4910 boolean vectors. */
4911 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4912 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4914 if (dump_enabled_p ())
4915 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4916 "type conversion to/from bit-precision "
4917 "unsupported.\n");
4918 return false;
4921 if (!vec_stmt) /* transformation not required. */
4923 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4924 if (dump_enabled_p ())
4925 dump_printf_loc (MSG_NOTE, vect_location,
4926 "=== vectorizable_assignment ===\n");
4927 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4928 return true;
4931 /* Transform. */
4932 if (dump_enabled_p ())
4933 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4935 /* Handle def. */
4936 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4938 /* Handle use. */
4939 for (j = 0; j < ncopies; j++)
4941 /* Handle uses. */
4942 if (j == 0)
4943 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
4944 else
4945 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4947 /* Arguments are ready. create the new vector stmt. */
4948 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4950 if (CONVERT_EXPR_CODE_P (code)
4951 || code == VIEW_CONVERT_EXPR)
4952 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4953 new_stmt = gimple_build_assign (vec_dest, vop);
4954 new_temp = make_ssa_name (vec_dest, new_stmt);
4955 gimple_assign_set_lhs (new_stmt, new_temp);
4956 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4957 if (slp_node)
4958 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4961 if (slp_node)
4962 continue;
4964 if (j == 0)
4965 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4966 else
4967 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4969 prev_stmt_info = vinfo_for_stmt (new_stmt);
4972 vec_oprnds.release ();
4973 return true;
4977 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4978 either as shift by a scalar or by a vector. */
4980 bool
4981 vect_supportable_shift (enum tree_code code, tree scalar_type)
4984 machine_mode vec_mode;
4985 optab optab;
4986 int icode;
4987 tree vectype;
4989 vectype = get_vectype_for_scalar_type (scalar_type);
4990 if (!vectype)
4991 return false;
4993 optab = optab_for_tree_code (code, vectype, optab_scalar);
4994 if (!optab
4995 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4997 optab = optab_for_tree_code (code, vectype, optab_vector);
4998 if (!optab
4999 || (optab_handler (optab, TYPE_MODE (vectype))
5000 == CODE_FOR_nothing))
5001 return false;
5004 vec_mode = TYPE_MODE (vectype);
5005 icode = (int) optab_handler (optab, vec_mode);
5006 if (icode == CODE_FOR_nothing)
5007 return false;
5009 return true;
5013 /* Function vectorizable_shift.
5015 Check if STMT performs a shift operation that can be vectorized.
5016 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5017 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5018 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5020 static bool
5021 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
5022 gimple **vec_stmt, slp_tree slp_node)
5024 tree vec_dest;
5025 tree scalar_dest;
5026 tree op0, op1 = NULL;
5027 tree vec_oprnd1 = NULL_TREE;
5028 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5029 tree vectype;
5030 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5031 enum tree_code code;
5032 machine_mode vec_mode;
5033 tree new_temp;
5034 optab optab;
5035 int icode;
5036 machine_mode optab_op2_mode;
5037 gimple *def_stmt;
5038 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5039 int ndts = 2;
5040 gimple *new_stmt = NULL;
5041 stmt_vec_info prev_stmt_info;
5042 poly_uint64 nunits_in;
5043 poly_uint64 nunits_out;
5044 tree vectype_out;
5045 tree op1_vectype;
5046 int ncopies;
5047 int j, i;
5048 vec<tree> vec_oprnds0 = vNULL;
5049 vec<tree> vec_oprnds1 = vNULL;
5050 tree vop0, vop1;
5051 unsigned int k;
5052 bool scalar_shift_arg = true;
5053 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5054 vec_info *vinfo = stmt_info->vinfo;
5056 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5057 return false;
5059 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5060 && ! vec_stmt)
5061 return false;
5063 /* Is STMT a vectorizable binary/unary operation? */
5064 if (!is_gimple_assign (stmt))
5065 return false;
5067 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5068 return false;
5070 code = gimple_assign_rhs_code (stmt);
5072 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5073 || code == RROTATE_EXPR))
5074 return false;
5076 scalar_dest = gimple_assign_lhs (stmt);
5077 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5078 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5080 if (dump_enabled_p ())
5081 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5082 "bit-precision shifts not supported.\n");
5083 return false;
5086 op0 = gimple_assign_rhs1 (stmt);
5087 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5089 if (dump_enabled_p ())
5090 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5091 "use not simple.\n");
5092 return false;
5094 /* If op0 is an external or constant def use a vector type with
5095 the same size as the output vector type. */
5096 if (!vectype)
5097 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5098 if (vec_stmt)
5099 gcc_assert (vectype);
5100 if (!vectype)
5102 if (dump_enabled_p ())
5103 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5104 "no vectype for scalar type\n");
5105 return false;
5108 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5109 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5110 if (maybe_ne (nunits_out, nunits_in))
5111 return false;
5113 op1 = gimple_assign_rhs2 (stmt);
5114 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
5116 if (dump_enabled_p ())
5117 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5118 "use not simple.\n");
5119 return false;
5122 /* Multiple types in SLP are handled by creating the appropriate number of
5123 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5124 case of SLP. */
5125 if (slp_node)
5126 ncopies = 1;
5127 else
5128 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5130 gcc_assert (ncopies >= 1);
5132 /* Determine whether the shift amount is a vector, or scalar. If the
5133 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5135 if ((dt[1] == vect_internal_def
5136 || dt[1] == vect_induction_def)
5137 && !slp_node)
5138 scalar_shift_arg = false;
5139 else if (dt[1] == vect_constant_def
5140 || dt[1] == vect_external_def
5141 || dt[1] == vect_internal_def)
5143 /* In SLP, need to check whether the shift count is the same,
5144 in loops if it is a constant or invariant, it is always
5145 a scalar shift. */
5146 if (slp_node)
5148 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5149 gimple *slpstmt;
5151 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
5152 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5153 scalar_shift_arg = false;
5156 /* If the shift amount is computed by a pattern stmt we cannot
5157 use the scalar amount directly thus give up and use a vector
5158 shift. */
5159 if (dt[1] == vect_internal_def)
5161 gimple *def = SSA_NAME_DEF_STMT (op1);
5162 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
5163 scalar_shift_arg = false;
5166 else
5168 if (dump_enabled_p ())
5169 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5170 "operand mode requires invariant argument.\n");
5171 return false;
5174 /* Vector shifted by vector. */
5175 if (!scalar_shift_arg)
5177 optab = optab_for_tree_code (code, vectype, optab_vector);
5178 if (dump_enabled_p ())
5179 dump_printf_loc (MSG_NOTE, vect_location,
5180 "vector/vector shift/rotate found.\n");
5182 if (!op1_vectype)
5183 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5184 if (op1_vectype == NULL_TREE
5185 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5187 if (dump_enabled_p ())
5188 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5189 "unusable type for last operand in"
5190 " vector/vector shift/rotate.\n");
5191 return false;
5194 /* See if the machine has a vector shifted by scalar insn and if not
5195 then see if it has a vector shifted by vector insn. */
5196 else
5198 optab = optab_for_tree_code (code, vectype, optab_scalar);
5199 if (optab
5200 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5202 if (dump_enabled_p ())
5203 dump_printf_loc (MSG_NOTE, vect_location,
5204 "vector/scalar shift/rotate found.\n");
5206 else
5208 optab = optab_for_tree_code (code, vectype, optab_vector);
5209 if (optab
5210 && (optab_handler (optab, TYPE_MODE (vectype))
5211 != CODE_FOR_nothing))
5213 scalar_shift_arg = false;
5215 if (dump_enabled_p ())
5216 dump_printf_loc (MSG_NOTE, vect_location,
5217 "vector/vector shift/rotate found.\n");
5219 /* Unlike the other binary operators, shifts/rotates have
5220 the rhs being int, instead of the same type as the lhs,
5221 so make sure the scalar is the right type if we are
5222 dealing with vectors of long long/long/short/char. */
5223 if (dt[1] == vect_constant_def)
5224 op1 = fold_convert (TREE_TYPE (vectype), op1);
5225 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5226 TREE_TYPE (op1)))
5228 if (slp_node
5229 && TYPE_MODE (TREE_TYPE (vectype))
5230 != TYPE_MODE (TREE_TYPE (op1)))
5232 if (dump_enabled_p ())
5233 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5234 "unusable type for last operand in"
5235 " vector/vector shift/rotate.\n");
5236 return false;
5238 if (vec_stmt && !slp_node)
5240 op1 = fold_convert (TREE_TYPE (vectype), op1);
5241 op1 = vect_init_vector (stmt, op1,
5242 TREE_TYPE (vectype), NULL);
5249 /* Supportable by target? */
5250 if (!optab)
5252 if (dump_enabled_p ())
5253 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5254 "no optab.\n");
5255 return false;
5257 vec_mode = TYPE_MODE (vectype);
5258 icode = (int) optab_handler (optab, vec_mode);
5259 if (icode == CODE_FOR_nothing)
5261 if (dump_enabled_p ())
5262 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5263 "op not supported by target.\n");
5264 /* Check only during analysis. */
5265 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5266 || (!vec_stmt
5267 && !vect_worthwhile_without_simd_p (vinfo, code)))
5268 return false;
5269 if (dump_enabled_p ())
5270 dump_printf_loc (MSG_NOTE, vect_location,
5271 "proceeding using word mode.\n");
5274 /* Worthwhile without SIMD support? Check only during analysis. */
5275 if (!vec_stmt
5276 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5277 && !vect_worthwhile_without_simd_p (vinfo, code))
5279 if (dump_enabled_p ())
5280 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5281 "not worthwhile without SIMD support.\n");
5282 return false;
5285 if (!vec_stmt) /* transformation not required. */
5287 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5288 if (dump_enabled_p ())
5289 dump_printf_loc (MSG_NOTE, vect_location,
5290 "=== vectorizable_shift ===\n");
5291 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5292 return true;
5295 /* Transform. */
5297 if (dump_enabled_p ())
5298 dump_printf_loc (MSG_NOTE, vect_location,
5299 "transform binary/unary operation.\n");
5301 /* Handle def. */
5302 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5304 prev_stmt_info = NULL;
5305 for (j = 0; j < ncopies; j++)
5307 /* Handle uses. */
5308 if (j == 0)
5310 if (scalar_shift_arg)
5312 /* Vector shl and shr insn patterns can be defined with scalar
5313 operand 2 (shift operand). In this case, use constant or loop
5314 invariant op1 directly, without extending it to vector mode
5315 first. */
5316 optab_op2_mode = insn_data[icode].operand[2].mode;
5317 if (!VECTOR_MODE_P (optab_op2_mode))
5319 if (dump_enabled_p ())
5320 dump_printf_loc (MSG_NOTE, vect_location,
5321 "operand 1 using scalar mode.\n");
5322 vec_oprnd1 = op1;
5323 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5324 vec_oprnds1.quick_push (vec_oprnd1);
5325 if (slp_node)
5327 /* Store vec_oprnd1 for every vector stmt to be created
5328 for SLP_NODE. We check during the analysis that all
5329 the shift arguments are the same.
5330 TODO: Allow different constants for different vector
5331 stmts generated for an SLP instance. */
5332 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5333 vec_oprnds1.quick_push (vec_oprnd1);
5338 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5339 (a special case for certain kind of vector shifts); otherwise,
5340 operand 1 should be of a vector type (the usual case). */
5341 if (vec_oprnd1)
5342 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5343 slp_node);
5344 else
5345 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5346 slp_node);
5348 else
5349 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5351 /* Arguments are ready. Create the new vector stmt. */
5352 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5354 vop1 = vec_oprnds1[i];
5355 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5356 new_temp = make_ssa_name (vec_dest, new_stmt);
5357 gimple_assign_set_lhs (new_stmt, new_temp);
5358 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5359 if (slp_node)
5360 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5363 if (slp_node)
5364 continue;
5366 if (j == 0)
5367 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5368 else
5369 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5370 prev_stmt_info = vinfo_for_stmt (new_stmt);
5373 vec_oprnds0.release ();
5374 vec_oprnds1.release ();
5376 return true;
5380 /* Function vectorizable_operation.
5382 Check if STMT performs a binary, unary or ternary operation that can
5383 be vectorized.
5384 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5385 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5386 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5388 static bool
5389 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5390 gimple **vec_stmt, slp_tree slp_node)
5392 tree vec_dest;
5393 tree scalar_dest;
5394 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5395 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5396 tree vectype;
5397 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5398 enum tree_code code, orig_code;
5399 machine_mode vec_mode;
5400 tree new_temp;
5401 int op_type;
5402 optab optab;
5403 bool target_support_p;
5404 gimple *def_stmt;
5405 enum vect_def_type dt[3]
5406 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5407 int ndts = 3;
5408 gimple *new_stmt = NULL;
5409 stmt_vec_info prev_stmt_info;
5410 poly_uint64 nunits_in;
5411 poly_uint64 nunits_out;
5412 tree vectype_out;
5413 int ncopies;
5414 int j, i;
5415 vec<tree> vec_oprnds0 = vNULL;
5416 vec<tree> vec_oprnds1 = vNULL;
5417 vec<tree> vec_oprnds2 = vNULL;
5418 tree vop0, vop1, vop2;
5419 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5420 vec_info *vinfo = stmt_info->vinfo;
5422 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5423 return false;
5425 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5426 && ! vec_stmt)
5427 return false;
5429 /* Is STMT a vectorizable binary/unary operation? */
5430 if (!is_gimple_assign (stmt))
5431 return false;
5433 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5434 return false;
5436 orig_code = code = gimple_assign_rhs_code (stmt);
5438 /* For pointer addition and subtraction, we should use the normal
5439 plus and minus for the vector operation. */
5440 if (code == POINTER_PLUS_EXPR)
5441 code = PLUS_EXPR;
5442 if (code == POINTER_DIFF_EXPR)
5443 code = MINUS_EXPR;
5445 /* Support only unary or binary operations. */
5446 op_type = TREE_CODE_LENGTH (code);
5447 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5449 if (dump_enabled_p ())
5450 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5451 "num. args = %d (not unary/binary/ternary op).\n",
5452 op_type);
5453 return false;
5456 scalar_dest = gimple_assign_lhs (stmt);
5457 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5459 /* Most operations cannot handle bit-precision types without extra
5460 truncations. */
5461 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5462 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5463 /* Exception are bitwise binary operations. */
5464 && code != BIT_IOR_EXPR
5465 && code != BIT_XOR_EXPR
5466 && code != BIT_AND_EXPR)
5468 if (dump_enabled_p ())
5469 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5470 "bit-precision arithmetic not supported.\n");
5471 return false;
5474 op0 = gimple_assign_rhs1 (stmt);
5475 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5477 if (dump_enabled_p ())
5478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5479 "use not simple.\n");
5480 return false;
5482 /* If op0 is an external or constant def use a vector type with
5483 the same size as the output vector type. */
5484 if (!vectype)
5486 /* For boolean type we cannot determine vectype by
5487 invariant value (don't know whether it is a vector
5488 of booleans or vector of integers). We use output
5489 vectype because operations on boolean don't change
5490 type. */
5491 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5493 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5495 if (dump_enabled_p ())
5496 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5497 "not supported operation on bool value.\n");
5498 return false;
5500 vectype = vectype_out;
5502 else
5503 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5505 if (vec_stmt)
5506 gcc_assert (vectype);
5507 if (!vectype)
5509 if (dump_enabled_p ())
5511 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5512 "no vectype for scalar type ");
5513 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5514 TREE_TYPE (op0));
5515 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5518 return false;
5521 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5522 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5523 if (maybe_ne (nunits_out, nunits_in))
5524 return false;
5526 if (op_type == binary_op || op_type == ternary_op)
5528 op1 = gimple_assign_rhs2 (stmt);
5529 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5531 if (dump_enabled_p ())
5532 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5533 "use not simple.\n");
5534 return false;
5537 if (op_type == ternary_op)
5539 op2 = gimple_assign_rhs3 (stmt);
5540 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5542 if (dump_enabled_p ())
5543 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5544 "use not simple.\n");
5545 return false;
5549 /* Multiple types in SLP are handled by creating the appropriate number of
5550 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5551 case of SLP. */
5552 if (slp_node)
5553 ncopies = 1;
5554 else
5555 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5557 gcc_assert (ncopies >= 1);
5559 /* Shifts are handled in vectorizable_shift (). */
5560 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5561 || code == RROTATE_EXPR)
5562 return false;
5564 /* Supportable by target? */
5566 vec_mode = TYPE_MODE (vectype);
5567 if (code == MULT_HIGHPART_EXPR)
5568 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5569 else
5571 optab = optab_for_tree_code (code, vectype, optab_default);
5572 if (!optab)
5574 if (dump_enabled_p ())
5575 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5576 "no optab.\n");
5577 return false;
5579 target_support_p = (optab_handler (optab, vec_mode)
5580 != CODE_FOR_nothing);
5583 if (!target_support_p)
5585 if (dump_enabled_p ())
5586 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5587 "op not supported by target.\n");
5588 /* Check only during analysis. */
5589 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5590 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5591 return false;
5592 if (dump_enabled_p ())
5593 dump_printf_loc (MSG_NOTE, vect_location,
5594 "proceeding using word mode.\n");
5597 /* Worthwhile without SIMD support? Check only during analysis. */
5598 if (!VECTOR_MODE_P (vec_mode)
5599 && !vec_stmt
5600 && !vect_worthwhile_without_simd_p (vinfo, code))
5602 if (dump_enabled_p ())
5603 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5604 "not worthwhile without SIMD support.\n");
5605 return false;
5608 if (!vec_stmt) /* transformation not required. */
5610 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5611 if (dump_enabled_p ())
5612 dump_printf_loc (MSG_NOTE, vect_location,
5613 "=== vectorizable_operation ===\n");
5614 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5615 return true;
5618 /* Transform. */
5620 if (dump_enabled_p ())
5621 dump_printf_loc (MSG_NOTE, vect_location,
5622 "transform binary/unary operation.\n");
5624 /* Handle def. */
5625 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5627 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5628 vectors with unsigned elements, but the result is signed. So, we
5629 need to compute the MINUS_EXPR into vectype temporary and
5630 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5631 tree vec_cvt_dest = NULL_TREE;
5632 if (orig_code == POINTER_DIFF_EXPR)
5633 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5635 /* In case the vectorization factor (VF) is bigger than the number
5636 of elements that we can fit in a vectype (nunits), we have to generate
5637 more than one vector stmt - i.e - we need to "unroll" the
5638 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5639 from one copy of the vector stmt to the next, in the field
5640 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5641 stages to find the correct vector defs to be used when vectorizing
5642 stmts that use the defs of the current stmt. The example below
5643 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5644 we need to create 4 vectorized stmts):
5646 before vectorization:
5647 RELATED_STMT VEC_STMT
5648 S1: x = memref - -
5649 S2: z = x + 1 - -
5651 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5652 there):
5653 RELATED_STMT VEC_STMT
5654 VS1_0: vx0 = memref0 VS1_1 -
5655 VS1_1: vx1 = memref1 VS1_2 -
5656 VS1_2: vx2 = memref2 VS1_3 -
5657 VS1_3: vx3 = memref3 - -
5658 S1: x = load - VS1_0
5659 S2: z = x + 1 - -
5661 step2: vectorize stmt S2 (done here):
5662 To vectorize stmt S2 we first need to find the relevant vector
5663 def for the first operand 'x'. This is, as usual, obtained from
5664 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5665 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5666 relevant vector def 'vx0'. Having found 'vx0' we can generate
5667 the vector stmt VS2_0, and as usual, record it in the
5668 STMT_VINFO_VEC_STMT of stmt S2.
5669 When creating the second copy (VS2_1), we obtain the relevant vector
5670 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5671 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5672 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5673 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5674 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5675 chain of stmts and pointers:
5676 RELATED_STMT VEC_STMT
5677 VS1_0: vx0 = memref0 VS1_1 -
5678 VS1_1: vx1 = memref1 VS1_2 -
5679 VS1_2: vx2 = memref2 VS1_3 -
5680 VS1_3: vx3 = memref3 - -
5681 S1: x = load - VS1_0
5682 VS2_0: vz0 = vx0 + v1 VS2_1 -
5683 VS2_1: vz1 = vx1 + v1 VS2_2 -
5684 VS2_2: vz2 = vx2 + v1 VS2_3 -
5685 VS2_3: vz3 = vx3 + v1 - -
5686 S2: z = x + 1 - VS2_0 */
5688 prev_stmt_info = NULL;
5689 for (j = 0; j < ncopies; j++)
5691 /* Handle uses. */
5692 if (j == 0)
5694 if (op_type == binary_op || op_type == ternary_op)
5695 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5696 slp_node);
5697 else
5698 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5699 slp_node);
5700 if (op_type == ternary_op)
5701 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5702 slp_node);
5704 else
5706 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5707 if (op_type == ternary_op)
5709 tree vec_oprnd = vec_oprnds2.pop ();
5710 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5711 vec_oprnd));
5715 /* Arguments are ready. Create the new vector stmt. */
5716 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5718 vop1 = ((op_type == binary_op || op_type == ternary_op)
5719 ? vec_oprnds1[i] : NULL_TREE);
5720 vop2 = ((op_type == ternary_op)
5721 ? vec_oprnds2[i] : NULL_TREE);
5722 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5723 new_temp = make_ssa_name (vec_dest, new_stmt);
5724 gimple_assign_set_lhs (new_stmt, new_temp);
5725 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5726 if (vec_cvt_dest)
5728 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
5729 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
5730 new_temp);
5731 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
5732 gimple_assign_set_lhs (new_stmt, new_temp);
5733 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5735 if (slp_node)
5736 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5739 if (slp_node)
5740 continue;
5742 if (j == 0)
5743 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5744 else
5745 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5746 prev_stmt_info = vinfo_for_stmt (new_stmt);
5749 vec_oprnds0.release ();
5750 vec_oprnds1.release ();
5751 vec_oprnds2.release ();
5753 return true;
5756 /* A helper function to ensure data reference DR's base alignment. */
5758 static void
5759 ensure_base_align (struct data_reference *dr)
5761 if (!dr->aux)
5762 return;
5764 if (DR_VECT_AUX (dr)->base_misaligned)
5766 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5768 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
5770 if (decl_in_symtab_p (base_decl))
5771 symtab_node::get (base_decl)->increase_alignment (align_base_to);
5772 else
5774 SET_DECL_ALIGN (base_decl, align_base_to);
5775 DECL_USER_ALIGN (base_decl) = 1;
5777 DR_VECT_AUX (dr)->base_misaligned = false;
5782 /* Function get_group_alias_ptr_type.
5784 Return the alias type for the group starting at FIRST_STMT. */
5786 static tree
5787 get_group_alias_ptr_type (gimple *first_stmt)
5789 struct data_reference *first_dr, *next_dr;
5790 gimple *next_stmt;
5792 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5793 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5794 while (next_stmt)
5796 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5797 if (get_alias_set (DR_REF (first_dr))
5798 != get_alias_set (DR_REF (next_dr)))
5800 if (dump_enabled_p ())
5801 dump_printf_loc (MSG_NOTE, vect_location,
5802 "conflicting alias set types.\n");
5803 return ptr_type_node;
5805 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5807 return reference_alias_ptr_type (DR_REF (first_dr));
5811 /* Function vectorizable_store.
5813 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5814 can be vectorized.
5815 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5816 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5817 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5819 static bool
5820 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5821 slp_tree slp_node)
5823 tree data_ref;
5824 tree op;
5825 tree vec_oprnd = NULL_TREE;
5826 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5827 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5828 tree elem_type;
5829 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5830 struct loop *loop = NULL;
5831 machine_mode vec_mode;
5832 tree dummy;
5833 enum dr_alignment_support alignment_support_scheme;
5834 gimple *def_stmt;
5835 enum vect_def_type dt;
5836 stmt_vec_info prev_stmt_info = NULL;
5837 tree dataref_ptr = NULL_TREE;
5838 tree dataref_offset = NULL_TREE;
5839 gimple *ptr_incr = NULL;
5840 int ncopies;
5841 int j;
5842 gimple *next_stmt, *first_stmt;
5843 bool grouped_store;
5844 unsigned int group_size, i;
5845 vec<tree> oprnds = vNULL;
5846 vec<tree> result_chain = vNULL;
5847 bool inv_p;
5848 tree offset = NULL_TREE;
5849 vec<tree> vec_oprnds = vNULL;
5850 bool slp = (slp_node != NULL);
5851 unsigned int vec_num;
5852 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5853 vec_info *vinfo = stmt_info->vinfo;
5854 tree aggr_type;
5855 gather_scatter_info gs_info;
5856 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5857 gimple *new_stmt;
5858 poly_uint64 vf;
5859 vec_load_store_type vls_type;
5860 tree ref_type;
5862 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5863 return false;
5865 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5866 && ! vec_stmt)
5867 return false;
5869 /* Is vectorizable store? */
5871 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
5872 if (is_gimple_assign (stmt))
5874 tree scalar_dest = gimple_assign_lhs (stmt);
5875 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5876 && is_pattern_stmt_p (stmt_info))
5877 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5878 if (TREE_CODE (scalar_dest) != ARRAY_REF
5879 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5880 && TREE_CODE (scalar_dest) != INDIRECT_REF
5881 && TREE_CODE (scalar_dest) != COMPONENT_REF
5882 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5883 && TREE_CODE (scalar_dest) != REALPART_EXPR
5884 && TREE_CODE (scalar_dest) != MEM_REF)
5885 return false;
5887 else
5889 gcall *call = dyn_cast <gcall *> (stmt);
5890 if (!call || !gimple_call_internal_p (call, IFN_MASK_STORE))
5891 return false;
5893 if (slp_node != NULL)
5895 if (dump_enabled_p ())
5896 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5897 "SLP of masked stores not supported.\n");
5898 return false;
5901 ref_type = TREE_TYPE (gimple_call_arg (call, 1));
5902 mask = gimple_call_arg (call, 2);
5903 if (!vect_check_load_store_mask (stmt, mask, &mask_vectype))
5904 return false;
5907 op = vect_get_store_rhs (stmt);
5909 /* Cannot have hybrid store SLP -- that would mean storing to the
5910 same location twice. */
5911 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5913 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5914 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5916 if (loop_vinfo)
5918 loop = LOOP_VINFO_LOOP (loop_vinfo);
5919 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5921 else
5922 vf = 1;
5924 /* Multiple types in SLP are handled by creating the appropriate number of
5925 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5926 case of SLP. */
5927 if (slp)
5928 ncopies = 1;
5929 else
5930 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5932 gcc_assert (ncopies >= 1);
5934 /* FORNOW. This restriction should be relaxed. */
5935 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5937 if (dump_enabled_p ())
5938 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5939 "multiple types in nested loop.\n");
5940 return false;
5943 if (!vect_check_store_rhs (stmt, op, &rhs_vectype, &vls_type))
5944 return false;
5946 elem_type = TREE_TYPE (vectype);
5947 vec_mode = TYPE_MODE (vectype);
5949 if (!STMT_VINFO_DATA_REF (stmt_info))
5950 return false;
5952 vect_memory_access_type memory_access_type;
5953 if (!get_load_store_type (stmt, vectype, slp, mask, vls_type, ncopies,
5954 &memory_access_type, &gs_info))
5955 return false;
5957 if (mask)
5959 if (memory_access_type == VMAT_CONTIGUOUS)
5961 if (!VECTOR_MODE_P (vec_mode)
5962 || !can_vec_mask_load_store_p (vec_mode,
5963 TYPE_MODE (mask_vectype), false))
5964 return false;
5966 else if (memory_access_type != VMAT_LOAD_STORE_LANES)
5968 if (dump_enabled_p ())
5969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5970 "unsupported access type for masked store.\n");
5971 return false;
5974 else
5976 /* FORNOW. In some cases can vectorize even if data-type not supported
5977 (e.g. - array initialization with 0). */
5978 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5979 return false;
5982 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
5983 if (grouped_store)
5985 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5986 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5987 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5989 else
5991 first_stmt = stmt;
5992 first_dr = dr;
5993 group_size = vec_num = 1;
5996 if (!vec_stmt) /* transformation not required. */
5998 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6000 if (loop_vinfo
6001 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6002 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
6003 memory_access_type, &gs_info);
6005 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
6006 /* The SLP costs are calculated during SLP analysis. */
6007 if (!PURE_SLP_STMT (stmt_info))
6008 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
6009 vls_type, NULL, NULL, NULL);
6010 return true;
6012 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6014 /* Transform. */
6016 ensure_base_align (dr);
6018 if (memory_access_type == VMAT_GATHER_SCATTER)
6020 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
6021 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6022 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6023 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
6024 edge pe = loop_preheader_edge (loop);
6025 gimple_seq seq;
6026 basic_block new_bb;
6027 enum { NARROW, NONE, WIDEN } modifier;
6028 poly_uint64 scatter_off_nunits
6029 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6031 if (known_eq (nunits, scatter_off_nunits))
6032 modifier = NONE;
6033 else if (known_eq (nunits * 2, scatter_off_nunits))
6035 modifier = WIDEN;
6037 /* Currently gathers and scatters are only supported for
6038 fixed-length vectors. */
6039 unsigned int count = scatter_off_nunits.to_constant ();
6040 vec_perm_builder sel (count, count, 1);
6041 for (i = 0; i < (unsigned int) count; ++i)
6042 sel.quick_push (i | (count / 2));
6044 vec_perm_indices indices (sel, 1, count);
6045 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6046 indices);
6047 gcc_assert (perm_mask != NULL_TREE);
6049 else if (known_eq (nunits, scatter_off_nunits * 2))
6051 modifier = NARROW;
6053 /* Currently gathers and scatters are only supported for
6054 fixed-length vectors. */
6055 unsigned int count = nunits.to_constant ();
6056 vec_perm_builder sel (count, count, 1);
6057 for (i = 0; i < (unsigned int) count; ++i)
6058 sel.quick_push (i | (count / 2));
6060 vec_perm_indices indices (sel, 2, count);
6061 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
6062 gcc_assert (perm_mask != NULL_TREE);
6063 ncopies *= 2;
6065 else
6066 gcc_unreachable ();
6068 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6069 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6070 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6071 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6072 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6073 scaletype = TREE_VALUE (arglist);
6075 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6076 && TREE_CODE (rettype) == VOID_TYPE);
6078 ptr = fold_convert (ptrtype, gs_info.base);
6079 if (!is_gimple_min_invariant (ptr))
6081 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6082 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6083 gcc_assert (!new_bb);
6086 /* Currently we support only unconditional scatter stores,
6087 so mask should be all ones. */
6088 mask = build_int_cst (masktype, -1);
6089 mask = vect_init_vector (stmt, mask, masktype, NULL);
6091 scale = build_int_cst (scaletype, gs_info.scale);
6093 prev_stmt_info = NULL;
6094 for (j = 0; j < ncopies; ++j)
6096 if (j == 0)
6098 src = vec_oprnd1
6099 = vect_get_vec_def_for_operand (op, stmt);
6100 op = vec_oprnd0
6101 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6103 else if (modifier != NONE && (j & 1))
6105 if (modifier == WIDEN)
6107 src = vec_oprnd1
6108 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
6109 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6110 stmt, gsi);
6112 else if (modifier == NARROW)
6114 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6115 stmt, gsi);
6116 op = vec_oprnd0
6117 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6118 vec_oprnd0);
6120 else
6121 gcc_unreachable ();
6123 else
6125 src = vec_oprnd1
6126 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
6127 op = vec_oprnd0
6128 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6129 vec_oprnd0);
6132 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6134 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6135 TYPE_VECTOR_SUBPARTS (srctype)));
6136 var = vect_get_new_ssa_name (srctype, vect_simple_var);
6137 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6138 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6139 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6140 src = var;
6143 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6145 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6146 TYPE_VECTOR_SUBPARTS (idxtype)));
6147 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6148 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6149 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6150 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6151 op = var;
6154 new_stmt
6155 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
6157 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6159 if (prev_stmt_info == NULL)
6160 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6161 else
6162 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6163 prev_stmt_info = vinfo_for_stmt (new_stmt);
6165 return true;
6168 if (grouped_store)
6170 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
6172 /* FORNOW */
6173 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
6175 /* We vectorize all the stmts of the interleaving group when we
6176 reach the last stmt in the group. */
6177 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
6178 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
6179 && !slp)
6181 *vec_stmt = NULL;
6182 return true;
6185 if (slp)
6187 grouped_store = false;
6188 /* VEC_NUM is the number of vect stmts to be created for this
6189 group. */
6190 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6191 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6192 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
6193 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6194 op = vect_get_store_rhs (first_stmt);
6196 else
6197 /* VEC_NUM is the number of vect stmts to be created for this
6198 group. */
6199 vec_num = group_size;
6201 ref_type = get_group_alias_ptr_type (first_stmt);
6203 else
6204 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6206 if (dump_enabled_p ())
6207 dump_printf_loc (MSG_NOTE, vect_location,
6208 "transform store. ncopies = %d\n", ncopies);
6210 if (memory_access_type == VMAT_ELEMENTWISE
6211 || memory_access_type == VMAT_STRIDED_SLP)
6213 gimple_stmt_iterator incr_gsi;
6214 bool insert_after;
6215 gimple *incr;
6216 tree offvar;
6217 tree ivstep;
6218 tree running_off;
6219 gimple_seq stmts = NULL;
6220 tree stride_base, stride_step, alias_off;
6221 tree vec_oprnd;
6222 unsigned int g;
6223 /* Checked by get_load_store_type. */
6224 unsigned int const_nunits = nunits.to_constant ();
6226 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
6227 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6229 stride_base
6230 = fold_build_pointer_plus
6231 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
6232 size_binop (PLUS_EXPR,
6233 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
6234 convert_to_ptrofftype (DR_INIT (first_dr))));
6235 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
6237 /* For a store with loop-invariant (but other than power-of-2)
6238 stride (i.e. not a grouped access) like so:
6240 for (i = 0; i < n; i += stride)
6241 array[i] = ...;
6243 we generate a new induction variable and new stores from
6244 the components of the (vectorized) rhs:
6246 for (j = 0; ; j += VF*stride)
6247 vectemp = ...;
6248 tmp1 = vectemp[0];
6249 array[j] = tmp1;
6250 tmp2 = vectemp[1];
6251 array[j + stride] = tmp2;
6255 unsigned nstores = const_nunits;
6256 unsigned lnel = 1;
6257 tree ltype = elem_type;
6258 tree lvectype = vectype;
6259 if (slp)
6261 if (group_size < const_nunits
6262 && const_nunits % group_size == 0)
6264 nstores = const_nunits / group_size;
6265 lnel = group_size;
6266 ltype = build_vector_type (elem_type, group_size);
6267 lvectype = vectype;
6269 /* First check if vec_extract optab doesn't support extraction
6270 of vector elts directly. */
6271 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6272 machine_mode vmode;
6273 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6274 || !VECTOR_MODE_P (vmode)
6275 || (convert_optab_handler (vec_extract_optab,
6276 TYPE_MODE (vectype), vmode)
6277 == CODE_FOR_nothing))
6279 /* Try to avoid emitting an extract of vector elements
6280 by performing the extracts using an integer type of the
6281 same size, extracting from a vector of those and then
6282 re-interpreting it as the original vector type if
6283 supported. */
6284 unsigned lsize
6285 = group_size * GET_MODE_BITSIZE (elmode);
6286 elmode = int_mode_for_size (lsize, 0).require ();
6287 unsigned int lnunits = const_nunits / group_size;
6288 /* If we can't construct such a vector fall back to
6289 element extracts from the original vector type and
6290 element size stores. */
6291 if (mode_for_vector (elmode, lnunits).exists (&vmode)
6292 && VECTOR_MODE_P (vmode)
6293 && (convert_optab_handler (vec_extract_optab,
6294 vmode, elmode)
6295 != CODE_FOR_nothing))
6297 nstores = lnunits;
6298 lnel = group_size;
6299 ltype = build_nonstandard_integer_type (lsize, 1);
6300 lvectype = build_vector_type (ltype, nstores);
6302 /* Else fall back to vector extraction anyway.
6303 Fewer stores are more important than avoiding spilling
6304 of the vector we extract from. Compared to the
6305 construction case in vectorizable_load no store-forwarding
6306 issue exists here for reasonable archs. */
6309 else if (group_size >= const_nunits
6310 && group_size % const_nunits == 0)
6312 nstores = 1;
6313 lnel = const_nunits;
6314 ltype = vectype;
6315 lvectype = vectype;
6317 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6318 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6321 ivstep = stride_step;
6322 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6323 build_int_cst (TREE_TYPE (ivstep), vf));
6325 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6327 create_iv (stride_base, ivstep, NULL,
6328 loop, &incr_gsi, insert_after,
6329 &offvar, NULL);
6330 incr = gsi_stmt (incr_gsi);
6331 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6333 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6334 if (stmts)
6335 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6337 prev_stmt_info = NULL;
6338 alias_off = build_int_cst (ref_type, 0);
6339 next_stmt = first_stmt;
6340 for (g = 0; g < group_size; g++)
6342 running_off = offvar;
6343 if (g)
6345 tree size = TYPE_SIZE_UNIT (ltype);
6346 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6347 size);
6348 tree newoff = copy_ssa_name (running_off, NULL);
6349 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6350 running_off, pos);
6351 vect_finish_stmt_generation (stmt, incr, gsi);
6352 running_off = newoff;
6354 unsigned int group_el = 0;
6355 unsigned HOST_WIDE_INT
6356 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6357 for (j = 0; j < ncopies; j++)
6359 /* We've set op and dt above, from vect_get_store_rhs,
6360 and first_stmt == stmt. */
6361 if (j == 0)
6363 if (slp)
6365 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6366 slp_node);
6367 vec_oprnd = vec_oprnds[0];
6369 else
6371 op = vect_get_store_rhs (next_stmt);
6372 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6375 else
6377 if (slp)
6378 vec_oprnd = vec_oprnds[j];
6379 else
6381 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
6382 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
6385 /* Pun the vector to extract from if necessary. */
6386 if (lvectype != vectype)
6388 tree tem = make_ssa_name (lvectype);
6389 gimple *pun
6390 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6391 lvectype, vec_oprnd));
6392 vect_finish_stmt_generation (stmt, pun, gsi);
6393 vec_oprnd = tem;
6395 for (i = 0; i < nstores; i++)
6397 tree newref, newoff;
6398 gimple *incr, *assign;
6399 tree size = TYPE_SIZE (ltype);
6400 /* Extract the i'th component. */
6401 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6402 bitsize_int (i), size);
6403 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6404 size, pos);
6406 elem = force_gimple_operand_gsi (gsi, elem, true,
6407 NULL_TREE, true,
6408 GSI_SAME_STMT);
6410 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6411 group_el * elsz);
6412 newref = build2 (MEM_REF, ltype,
6413 running_off, this_off);
6415 /* And store it to *running_off. */
6416 assign = gimple_build_assign (newref, elem);
6417 vect_finish_stmt_generation (stmt, assign, gsi);
6419 group_el += lnel;
6420 if (! slp
6421 || group_el == group_size)
6423 newoff = copy_ssa_name (running_off, NULL);
6424 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6425 running_off, stride_step);
6426 vect_finish_stmt_generation (stmt, incr, gsi);
6428 running_off = newoff;
6429 group_el = 0;
6431 if (g == group_size - 1
6432 && !slp)
6434 if (j == 0 && i == 0)
6435 STMT_VINFO_VEC_STMT (stmt_info)
6436 = *vec_stmt = assign;
6437 else
6438 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6439 prev_stmt_info = vinfo_for_stmt (assign);
6443 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6444 if (slp)
6445 break;
6448 vec_oprnds.release ();
6449 return true;
6452 auto_vec<tree> dr_chain (group_size);
6453 oprnds.create (group_size);
6455 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6456 gcc_assert (alignment_support_scheme);
6457 bool masked_loop_p = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
6458 /* Targets with store-lane instructions must not require explicit
6459 realignment. vect_supportable_dr_alignment always returns either
6460 dr_aligned or dr_unaligned_supported for masked operations. */
6461 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6462 && !mask
6463 && !masked_loop_p)
6464 || alignment_support_scheme == dr_aligned
6465 || alignment_support_scheme == dr_unaligned_supported);
6467 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6468 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6469 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6471 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6472 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6473 else
6474 aggr_type = vectype;
6476 if (mask)
6477 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6479 /* In case the vectorization factor (VF) is bigger than the number
6480 of elements that we can fit in a vectype (nunits), we have to generate
6481 more than one vector stmt - i.e - we need to "unroll" the
6482 vector stmt by a factor VF/nunits. For more details see documentation in
6483 vect_get_vec_def_for_copy_stmt. */
6485 /* In case of interleaving (non-unit grouped access):
6487 S1: &base + 2 = x2
6488 S2: &base = x0
6489 S3: &base + 1 = x1
6490 S4: &base + 3 = x3
6492 We create vectorized stores starting from base address (the access of the
6493 first stmt in the chain (S2 in the above example), when the last store stmt
6494 of the chain (S4) is reached:
6496 VS1: &base = vx2
6497 VS2: &base + vec_size*1 = vx0
6498 VS3: &base + vec_size*2 = vx1
6499 VS4: &base + vec_size*3 = vx3
6501 Then permutation statements are generated:
6503 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6504 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6507 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6508 (the order of the data-refs in the output of vect_permute_store_chain
6509 corresponds to the order of scalar stmts in the interleaving chain - see
6510 the documentation of vect_permute_store_chain()).
6512 In case of both multiple types and interleaving, above vector stores and
6513 permutation stmts are created for every copy. The result vector stmts are
6514 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6515 STMT_VINFO_RELATED_STMT for the next copies.
6518 prev_stmt_info = NULL;
6519 tree vec_mask = NULL_TREE;
6520 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
6521 for (j = 0; j < ncopies; j++)
6524 if (j == 0)
6526 if (slp)
6528 /* Get vectorized arguments for SLP_NODE. */
6529 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6530 NULL, slp_node);
6532 vec_oprnd = vec_oprnds[0];
6534 else
6536 /* For interleaved stores we collect vectorized defs for all the
6537 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6538 used as an input to vect_permute_store_chain(), and OPRNDS as
6539 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6541 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6542 OPRNDS are of size 1. */
6543 next_stmt = first_stmt;
6544 for (i = 0; i < group_size; i++)
6546 /* Since gaps are not supported for interleaved stores,
6547 GROUP_SIZE is the exact number of stmts in the chain.
6548 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6549 there is no interleaving, GROUP_SIZE is 1, and only one
6550 iteration of the loop will be executed. */
6551 op = vect_get_store_rhs (next_stmt);
6552 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6553 dr_chain.quick_push (vec_oprnd);
6554 oprnds.quick_push (vec_oprnd);
6555 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6557 if (mask)
6558 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
6559 mask_vectype);
6562 /* We should have catched mismatched types earlier. */
6563 gcc_assert (useless_type_conversion_p (vectype,
6564 TREE_TYPE (vec_oprnd)));
6565 bool simd_lane_access_p
6566 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6567 if (simd_lane_access_p
6568 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6569 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6570 && integer_zerop (DR_OFFSET (first_dr))
6571 && integer_zerop (DR_INIT (first_dr))
6572 && alias_sets_conflict_p (get_alias_set (aggr_type),
6573 get_alias_set (TREE_TYPE (ref_type))))
6575 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6576 dataref_offset = build_int_cst (ref_type, 0);
6577 inv_p = false;
6579 else
6580 dataref_ptr
6581 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6582 simd_lane_access_p ? loop : NULL,
6583 offset, &dummy, gsi, &ptr_incr,
6584 simd_lane_access_p, &inv_p);
6585 gcc_assert (bb_vinfo || !inv_p);
6587 else
6589 /* For interleaved stores we created vectorized defs for all the
6590 defs stored in OPRNDS in the previous iteration (previous copy).
6591 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6592 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6593 next copy.
6594 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6595 OPRNDS are of size 1. */
6596 for (i = 0; i < group_size; i++)
6598 op = oprnds[i];
6599 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
6600 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
6601 dr_chain[i] = vec_oprnd;
6602 oprnds[i] = vec_oprnd;
6604 if (mask)
6606 vect_is_simple_use (vec_mask, vinfo, &def_stmt, &dt);
6607 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
6609 if (dataref_offset)
6610 dataref_offset
6611 = int_const_binop (PLUS_EXPR, dataref_offset,
6612 TYPE_SIZE_UNIT (aggr_type));
6613 else
6614 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6615 TYPE_SIZE_UNIT (aggr_type));
6618 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6620 tree vec_array;
6622 /* Combine all the vectors into an array. */
6623 vec_array = create_vector_array (vectype, vec_num);
6624 for (i = 0; i < vec_num; i++)
6626 vec_oprnd = dr_chain[i];
6627 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6630 tree final_mask = NULL;
6631 if (masked_loop_p)
6632 final_mask = vect_get_loop_mask (gsi, masks, ncopies, vectype, j);
6633 if (vec_mask)
6634 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
6635 vec_mask, gsi);
6637 gcall *call;
6638 if (final_mask)
6640 /* Emit:
6641 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
6642 VEC_ARRAY). */
6643 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
6644 tree alias_ptr = build_int_cst (ref_type, align);
6645 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
6646 dataref_ptr, alias_ptr,
6647 final_mask, vec_array);
6649 else
6651 /* Emit:
6652 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6653 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6654 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
6655 vec_array);
6656 gimple_call_set_lhs (call, data_ref);
6658 gimple_call_set_nothrow (call, true);
6659 new_stmt = call;
6660 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6662 else
6664 new_stmt = NULL;
6665 if (grouped_store)
6667 if (j == 0)
6668 result_chain.create (group_size);
6669 /* Permute. */
6670 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6671 &result_chain);
6674 next_stmt = first_stmt;
6675 for (i = 0; i < vec_num; i++)
6677 unsigned align, misalign;
6679 tree final_mask = NULL_TREE;
6680 if (masked_loop_p)
6681 final_mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6682 vectype, vec_num * j + i);
6683 if (vec_mask)
6684 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
6685 vec_mask, gsi);
6687 if (i > 0)
6688 /* Bump the vector pointer. */
6689 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6690 stmt, NULL_TREE);
6692 if (slp)
6693 vec_oprnd = vec_oprnds[i];
6694 else if (grouped_store)
6695 /* For grouped stores vectorized defs are interleaved in
6696 vect_permute_store_chain(). */
6697 vec_oprnd = result_chain[i];
6699 align = DR_TARGET_ALIGNMENT (first_dr);
6700 if (aligned_access_p (first_dr))
6701 misalign = 0;
6702 else if (DR_MISALIGNMENT (first_dr) == -1)
6704 align = dr_alignment (vect_dr_behavior (first_dr));
6705 misalign = 0;
6707 else
6708 misalign = DR_MISALIGNMENT (first_dr);
6709 if (dataref_offset == NULL_TREE
6710 && TREE_CODE (dataref_ptr) == SSA_NAME)
6711 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6712 misalign);
6714 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6716 tree perm_mask = perm_mask_for_reverse (vectype);
6717 tree perm_dest
6718 = vect_create_destination_var (vect_get_store_rhs (stmt),
6719 vectype);
6720 tree new_temp = make_ssa_name (perm_dest);
6722 /* Generate the permute statement. */
6723 gimple *perm_stmt
6724 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6725 vec_oprnd, perm_mask);
6726 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6728 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6729 vec_oprnd = new_temp;
6732 /* Arguments are ready. Create the new vector stmt. */
6733 if (final_mask)
6735 align = least_bit_hwi (misalign | align);
6736 tree ptr = build_int_cst (ref_type, align);
6737 gcall *call
6738 = gimple_build_call_internal (IFN_MASK_STORE, 4,
6739 dataref_ptr, ptr,
6740 final_mask, vec_oprnd);
6741 gimple_call_set_nothrow (call, true);
6742 new_stmt = call;
6744 else
6746 data_ref = fold_build2 (MEM_REF, vectype,
6747 dataref_ptr,
6748 dataref_offset
6749 ? dataref_offset
6750 : build_int_cst (ref_type, 0));
6751 if (aligned_access_p (first_dr))
6753 else if (DR_MISALIGNMENT (first_dr) == -1)
6754 TREE_TYPE (data_ref)
6755 = build_aligned_type (TREE_TYPE (data_ref),
6756 align * BITS_PER_UNIT);
6757 else
6758 TREE_TYPE (data_ref)
6759 = build_aligned_type (TREE_TYPE (data_ref),
6760 TYPE_ALIGN (elem_type));
6761 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6763 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6765 if (slp)
6766 continue;
6768 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6769 if (!next_stmt)
6770 break;
6773 if (!slp)
6775 if (j == 0)
6776 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6777 else
6778 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6779 prev_stmt_info = vinfo_for_stmt (new_stmt);
6783 oprnds.release ();
6784 result_chain.release ();
6785 vec_oprnds.release ();
6787 return true;
6790 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6791 VECTOR_CST mask. No checks are made that the target platform supports the
6792 mask, so callers may wish to test can_vec_perm_const_p separately, or use
6793 vect_gen_perm_mask_checked. */
6795 tree
6796 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
6798 tree mask_type;
6800 poly_uint64 nunits = sel.length ();
6801 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
6803 mask_type = build_vector_type (ssizetype, nunits);
6804 return vec_perm_indices_to_tree (mask_type, sel);
6807 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
6808 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6810 tree
6811 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
6813 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
6814 return vect_gen_perm_mask_any (vectype, sel);
6817 /* Given a vector variable X and Y, that was generated for the scalar
6818 STMT, generate instructions to permute the vector elements of X and Y
6819 using permutation mask MASK_VEC, insert them at *GSI and return the
6820 permuted vector variable. */
6822 static tree
6823 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6824 gimple_stmt_iterator *gsi)
6826 tree vectype = TREE_TYPE (x);
6827 tree perm_dest, data_ref;
6828 gimple *perm_stmt;
6830 tree scalar_dest = gimple_get_lhs (stmt);
6831 if (TREE_CODE (scalar_dest) == SSA_NAME)
6832 perm_dest = vect_create_destination_var (scalar_dest, vectype);
6833 else
6834 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
6835 data_ref = make_ssa_name (perm_dest);
6837 /* Generate the permute statement. */
6838 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6839 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6841 return data_ref;
6844 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6845 inserting them on the loops preheader edge. Returns true if we
6846 were successful in doing so (and thus STMT can be moved then),
6847 otherwise returns false. */
6849 static bool
6850 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6852 ssa_op_iter i;
6853 tree op;
6854 bool any = false;
6856 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6858 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6859 if (!gimple_nop_p (def_stmt)
6860 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6862 /* Make sure we don't need to recurse. While we could do
6863 so in simple cases when there are more complex use webs
6864 we don't have an easy way to preserve stmt order to fulfil
6865 dependencies within them. */
6866 tree op2;
6867 ssa_op_iter i2;
6868 if (gimple_code (def_stmt) == GIMPLE_PHI)
6869 return false;
6870 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6872 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6873 if (!gimple_nop_p (def_stmt2)
6874 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6875 return false;
6877 any = true;
6881 if (!any)
6882 return true;
6884 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6886 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6887 if (!gimple_nop_p (def_stmt)
6888 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6890 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6891 gsi_remove (&gsi, false);
6892 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6896 return true;
6899 /* vectorizable_load.
6901 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6902 can be vectorized.
6903 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6904 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6905 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6907 static bool
6908 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6909 slp_tree slp_node, slp_instance slp_node_instance)
6911 tree scalar_dest;
6912 tree vec_dest = NULL;
6913 tree data_ref = NULL;
6914 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6915 stmt_vec_info prev_stmt_info;
6916 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6917 struct loop *loop = NULL;
6918 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6919 bool nested_in_vect_loop = false;
6920 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6921 tree elem_type;
6922 tree new_temp;
6923 machine_mode mode;
6924 gimple *new_stmt = NULL;
6925 tree dummy;
6926 enum dr_alignment_support alignment_support_scheme;
6927 tree dataref_ptr = NULL_TREE;
6928 tree dataref_offset = NULL_TREE;
6929 gimple *ptr_incr = NULL;
6930 int ncopies;
6931 int i, j;
6932 unsigned int group_size;
6933 poly_uint64 group_gap_adj;
6934 tree msq = NULL_TREE, lsq;
6935 tree offset = NULL_TREE;
6936 tree byte_offset = NULL_TREE;
6937 tree realignment_token = NULL_TREE;
6938 gphi *phi = NULL;
6939 vec<tree> dr_chain = vNULL;
6940 bool grouped_load = false;
6941 gimple *first_stmt;
6942 gimple *first_stmt_for_drptr = NULL;
6943 bool inv_p;
6944 bool compute_in_loop = false;
6945 struct loop *at_loop;
6946 int vec_num;
6947 bool slp = (slp_node != NULL);
6948 bool slp_perm = false;
6949 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6950 poly_uint64 vf;
6951 tree aggr_type;
6952 gather_scatter_info gs_info;
6953 vec_info *vinfo = stmt_info->vinfo;
6954 tree ref_type;
6956 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6957 return false;
6959 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6960 && ! vec_stmt)
6961 return false;
6963 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6964 if (is_gimple_assign (stmt))
6966 scalar_dest = gimple_assign_lhs (stmt);
6967 if (TREE_CODE (scalar_dest) != SSA_NAME)
6968 return false;
6970 tree_code code = gimple_assign_rhs_code (stmt);
6971 if (code != ARRAY_REF
6972 && code != BIT_FIELD_REF
6973 && code != INDIRECT_REF
6974 && code != COMPONENT_REF
6975 && code != IMAGPART_EXPR
6976 && code != REALPART_EXPR
6977 && code != MEM_REF
6978 && TREE_CODE_CLASS (code) != tcc_declaration)
6979 return false;
6981 else
6983 gcall *call = dyn_cast <gcall *> (stmt);
6984 if (!call || !gimple_call_internal_p (call))
6985 return false;
6987 internal_fn ifn = gimple_call_internal_fn (call);
6988 if (!internal_load_fn_p (ifn))
6989 return false;
6991 scalar_dest = gimple_call_lhs (call);
6992 if (!scalar_dest)
6993 return false;
6995 if (slp_node != NULL)
6997 if (dump_enabled_p ())
6998 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6999 "SLP of masked loads not supported.\n");
7000 return false;
7003 int mask_index = internal_fn_mask_index (ifn);
7004 if (mask_index >= 0)
7006 mask = gimple_call_arg (call, mask_index);
7007 if (!vect_check_load_store_mask (stmt, mask, &mask_vectype))
7008 return false;
7012 if (!STMT_VINFO_DATA_REF (stmt_info))
7013 return false;
7015 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7016 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7018 if (loop_vinfo)
7020 loop = LOOP_VINFO_LOOP (loop_vinfo);
7021 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
7022 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7024 else
7025 vf = 1;
7027 /* Multiple types in SLP are handled by creating the appropriate number of
7028 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7029 case of SLP. */
7030 if (slp)
7031 ncopies = 1;
7032 else
7033 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7035 gcc_assert (ncopies >= 1);
7037 /* FORNOW. This restriction should be relaxed. */
7038 if (nested_in_vect_loop && ncopies > 1)
7040 if (dump_enabled_p ())
7041 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7042 "multiple types in nested loop.\n");
7043 return false;
7046 /* Invalidate assumptions made by dependence analysis when vectorization
7047 on the unrolled body effectively re-orders stmts. */
7048 if (ncopies > 1
7049 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7050 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7051 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7053 if (dump_enabled_p ())
7054 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7055 "cannot perform implicit CSE when unrolling "
7056 "with negative dependence distance\n");
7057 return false;
7060 elem_type = TREE_TYPE (vectype);
7061 mode = TYPE_MODE (vectype);
7063 /* FORNOW. In some cases can vectorize even if data-type not supported
7064 (e.g. - data copies). */
7065 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
7067 if (dump_enabled_p ())
7068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7069 "Aligned load, but unsupported type.\n");
7070 return false;
7073 /* Check if the load is a part of an interleaving chain. */
7074 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7076 grouped_load = true;
7077 /* FORNOW */
7078 gcc_assert (!nested_in_vect_loop);
7079 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
7081 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7082 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7084 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7085 slp_perm = true;
7087 /* Invalidate assumptions made by dependence analysis when vectorization
7088 on the unrolled body effectively re-orders stmts. */
7089 if (!PURE_SLP_STMT (stmt_info)
7090 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7091 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7092 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7094 if (dump_enabled_p ())
7095 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7096 "cannot perform implicit CSE when performing "
7097 "group loads with negative dependence distance\n");
7098 return false;
7101 /* Similarly when the stmt is a load that is both part of a SLP
7102 instance and a loop vectorized stmt via the same-dr mechanism
7103 we have to give up. */
7104 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
7105 && (STMT_SLP_TYPE (stmt_info)
7106 != STMT_SLP_TYPE (vinfo_for_stmt
7107 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
7109 if (dump_enabled_p ())
7110 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7111 "conflicting SLP types for CSEd load\n");
7112 return false;
7115 else
7116 group_size = 1;
7118 vect_memory_access_type memory_access_type;
7119 if (!get_load_store_type (stmt, vectype, slp, mask, VLS_LOAD, ncopies,
7120 &memory_access_type, &gs_info))
7121 return false;
7123 if (mask)
7125 if (memory_access_type == VMAT_CONTIGUOUS)
7127 machine_mode vec_mode = TYPE_MODE (vectype);
7128 if (!VECTOR_MODE_P (vec_mode)
7129 || !can_vec_mask_load_store_p (vec_mode,
7130 TYPE_MODE (mask_vectype), true))
7131 return false;
7133 else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7135 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7136 tree masktype
7137 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
7138 if (TREE_CODE (masktype) == INTEGER_TYPE)
7140 if (dump_enabled_p ())
7141 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7142 "masked gather with integer mask not"
7143 " supported.");
7144 return false;
7147 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7148 && memory_access_type != VMAT_GATHER_SCATTER)
7150 if (dump_enabled_p ())
7151 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7152 "unsupported access type for masked load.\n");
7153 return false;
7157 if (!vec_stmt) /* transformation not required. */
7159 if (!slp)
7160 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7162 if (loop_vinfo
7163 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7164 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
7165 memory_access_type, &gs_info);
7167 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
7168 /* The SLP costs are calculated during SLP analysis. */
7169 if (!PURE_SLP_STMT (stmt_info))
7170 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7171 NULL, NULL, NULL);
7172 return true;
7175 if (!slp)
7176 gcc_assert (memory_access_type
7177 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7179 if (dump_enabled_p ())
7180 dump_printf_loc (MSG_NOTE, vect_location,
7181 "transform load. ncopies = %d\n", ncopies);
7183 /* Transform. */
7185 ensure_base_align (dr);
7187 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7189 vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask);
7190 return true;
7193 if (memory_access_type == VMAT_ELEMENTWISE
7194 || memory_access_type == VMAT_STRIDED_SLP)
7196 gimple_stmt_iterator incr_gsi;
7197 bool insert_after;
7198 gimple *incr;
7199 tree offvar;
7200 tree ivstep;
7201 tree running_off;
7202 vec<constructor_elt, va_gc> *v = NULL;
7203 gimple_seq stmts = NULL;
7204 tree stride_base, stride_step, alias_off;
7205 /* Checked by get_load_store_type. */
7206 unsigned int const_nunits = nunits.to_constant ();
7208 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7209 gcc_assert (!nested_in_vect_loop);
7211 if (slp && grouped_load)
7213 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7214 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7215 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7216 ref_type = get_group_alias_ptr_type (first_stmt);
7218 else
7220 first_stmt = stmt;
7221 first_dr = dr;
7222 group_size = 1;
7223 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7226 stride_base
7227 = fold_build_pointer_plus
7228 (DR_BASE_ADDRESS (first_dr),
7229 size_binop (PLUS_EXPR,
7230 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7231 convert_to_ptrofftype (DR_INIT (first_dr))));
7232 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7234 /* For a load with loop-invariant (but other than power-of-2)
7235 stride (i.e. not a grouped access) like so:
7237 for (i = 0; i < n; i += stride)
7238 ... = array[i];
7240 we generate a new induction variable and new accesses to
7241 form a new vector (or vectors, depending on ncopies):
7243 for (j = 0; ; j += VF*stride)
7244 tmp1 = array[j];
7245 tmp2 = array[j + stride];
7247 vectemp = {tmp1, tmp2, ...}
7250 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7251 build_int_cst (TREE_TYPE (stride_step), vf));
7253 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7255 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
7256 loop, &incr_gsi, insert_after,
7257 &offvar, NULL);
7258 incr = gsi_stmt (incr_gsi);
7259 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7261 stride_step = force_gimple_operand (unshare_expr (stride_step),
7262 &stmts, true, NULL_TREE);
7263 if (stmts)
7264 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
7266 prev_stmt_info = NULL;
7267 running_off = offvar;
7268 alias_off = build_int_cst (ref_type, 0);
7269 int nloads = const_nunits;
7270 int lnel = 1;
7271 tree ltype = TREE_TYPE (vectype);
7272 tree lvectype = vectype;
7273 auto_vec<tree> dr_chain;
7274 if (memory_access_type == VMAT_STRIDED_SLP)
7276 if (group_size < const_nunits)
7278 /* First check if vec_init optab supports construction from
7279 vector elts directly. */
7280 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7281 machine_mode vmode;
7282 if (mode_for_vector (elmode, group_size).exists (&vmode)
7283 && VECTOR_MODE_P (vmode)
7284 && (convert_optab_handler (vec_init_optab,
7285 TYPE_MODE (vectype), vmode)
7286 != CODE_FOR_nothing))
7288 nloads = const_nunits / group_size;
7289 lnel = group_size;
7290 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7292 else
7294 /* Otherwise avoid emitting a constructor of vector elements
7295 by performing the loads using an integer type of the same
7296 size, constructing a vector of those and then
7297 re-interpreting it as the original vector type.
7298 This avoids a huge runtime penalty due to the general
7299 inability to perform store forwarding from smaller stores
7300 to a larger load. */
7301 unsigned lsize
7302 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7303 elmode = int_mode_for_size (lsize, 0).require ();
7304 unsigned int lnunits = const_nunits / group_size;
7305 /* If we can't construct such a vector fall back to
7306 element loads of the original vector type. */
7307 if (mode_for_vector (elmode, lnunits).exists (&vmode)
7308 && VECTOR_MODE_P (vmode)
7309 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7310 != CODE_FOR_nothing))
7312 nloads = lnunits;
7313 lnel = group_size;
7314 ltype = build_nonstandard_integer_type (lsize, 1);
7315 lvectype = build_vector_type (ltype, nloads);
7319 else
7321 nloads = 1;
7322 lnel = const_nunits;
7323 ltype = vectype;
7325 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7327 if (slp)
7329 /* For SLP permutation support we need to load the whole group,
7330 not only the number of vector stmts the permutation result
7331 fits in. */
7332 if (slp_perm)
7334 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7335 variable VF. */
7336 unsigned int const_vf = vf.to_constant ();
7337 ncopies = CEIL (group_size * const_vf, const_nunits);
7338 dr_chain.create (ncopies);
7340 else
7341 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7343 unsigned int group_el = 0;
7344 unsigned HOST_WIDE_INT
7345 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7346 for (j = 0; j < ncopies; j++)
7348 if (nloads > 1)
7349 vec_alloc (v, nloads);
7350 for (i = 0; i < nloads; i++)
7352 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7353 group_el * elsz);
7354 new_stmt = gimple_build_assign (make_ssa_name (ltype),
7355 build2 (MEM_REF, ltype,
7356 running_off, this_off));
7357 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7358 if (nloads > 1)
7359 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7360 gimple_assign_lhs (new_stmt));
7362 group_el += lnel;
7363 if (! slp
7364 || group_el == group_size)
7366 tree newoff = copy_ssa_name (running_off);
7367 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7368 running_off, stride_step);
7369 vect_finish_stmt_generation (stmt, incr, gsi);
7371 running_off = newoff;
7372 group_el = 0;
7375 if (nloads > 1)
7377 tree vec_inv = build_constructor (lvectype, v);
7378 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7379 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7380 if (lvectype != vectype)
7382 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7383 VIEW_CONVERT_EXPR,
7384 build1 (VIEW_CONVERT_EXPR,
7385 vectype, new_temp));
7386 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7390 if (slp)
7392 if (slp_perm)
7393 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7394 else
7395 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7397 else
7399 if (j == 0)
7400 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7401 else
7402 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7403 prev_stmt_info = vinfo_for_stmt (new_stmt);
7406 if (slp_perm)
7408 unsigned n_perms;
7409 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7410 slp_node_instance, false, &n_perms);
7412 return true;
7415 if (grouped_load)
7417 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7418 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7419 /* For SLP vectorization we directly vectorize a subchain
7420 without permutation. */
7421 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7422 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7423 /* For BB vectorization always use the first stmt to base
7424 the data ref pointer on. */
7425 if (bb_vinfo)
7426 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7428 /* Check if the chain of loads is already vectorized. */
7429 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7430 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7431 ??? But we can only do so if there is exactly one
7432 as we have no way to get at the rest. Leave the CSE
7433 opportunity alone.
7434 ??? With the group load eventually participating
7435 in multiple different permutations (having multiple
7436 slp nodes which refer to the same group) the CSE
7437 is even wrong code. See PR56270. */
7438 && !slp)
7440 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7441 return true;
7443 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7444 group_gap_adj = 0;
7446 /* VEC_NUM is the number of vect stmts to be created for this group. */
7447 if (slp)
7449 grouped_load = false;
7450 /* For SLP permutation support we need to load the whole group,
7451 not only the number of vector stmts the permutation result
7452 fits in. */
7453 if (slp_perm)
7455 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7456 variable VF. */
7457 unsigned int const_vf = vf.to_constant ();
7458 unsigned int const_nunits = nunits.to_constant ();
7459 vec_num = CEIL (group_size * const_vf, const_nunits);
7460 group_gap_adj = vf * group_size - nunits * vec_num;
7462 else
7464 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7465 group_gap_adj
7466 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
7469 else
7470 vec_num = group_size;
7472 ref_type = get_group_alias_ptr_type (first_stmt);
7474 else
7476 first_stmt = stmt;
7477 first_dr = dr;
7478 group_size = vec_num = 1;
7479 group_gap_adj = 0;
7480 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7483 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7484 gcc_assert (alignment_support_scheme);
7485 bool masked_loop_p = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7486 /* Targets with store-lane instructions must not require explicit
7487 realignment. vect_supportable_dr_alignment always returns either
7488 dr_aligned or dr_unaligned_supported for masked operations. */
7489 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7490 && !mask
7491 && !masked_loop_p)
7492 || alignment_support_scheme == dr_aligned
7493 || alignment_support_scheme == dr_unaligned_supported);
7495 /* In case the vectorization factor (VF) is bigger than the number
7496 of elements that we can fit in a vectype (nunits), we have to generate
7497 more than one vector stmt - i.e - we need to "unroll" the
7498 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7499 from one copy of the vector stmt to the next, in the field
7500 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7501 stages to find the correct vector defs to be used when vectorizing
7502 stmts that use the defs of the current stmt. The example below
7503 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7504 need to create 4 vectorized stmts):
7506 before vectorization:
7507 RELATED_STMT VEC_STMT
7508 S1: x = memref - -
7509 S2: z = x + 1 - -
7511 step 1: vectorize stmt S1:
7512 We first create the vector stmt VS1_0, and, as usual, record a
7513 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7514 Next, we create the vector stmt VS1_1, and record a pointer to
7515 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7516 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7517 stmts and pointers:
7518 RELATED_STMT VEC_STMT
7519 VS1_0: vx0 = memref0 VS1_1 -
7520 VS1_1: vx1 = memref1 VS1_2 -
7521 VS1_2: vx2 = memref2 VS1_3 -
7522 VS1_3: vx3 = memref3 - -
7523 S1: x = load - VS1_0
7524 S2: z = x + 1 - -
7526 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7527 information we recorded in RELATED_STMT field is used to vectorize
7528 stmt S2. */
7530 /* In case of interleaving (non-unit grouped access):
7532 S1: x2 = &base + 2
7533 S2: x0 = &base
7534 S3: x1 = &base + 1
7535 S4: x3 = &base + 3
7537 Vectorized loads are created in the order of memory accesses
7538 starting from the access of the first stmt of the chain:
7540 VS1: vx0 = &base
7541 VS2: vx1 = &base + vec_size*1
7542 VS3: vx3 = &base + vec_size*2
7543 VS4: vx4 = &base + vec_size*3
7545 Then permutation statements are generated:
7547 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7548 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7551 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7552 (the order of the data-refs in the output of vect_permute_load_chain
7553 corresponds to the order of scalar stmts in the interleaving chain - see
7554 the documentation of vect_permute_load_chain()).
7555 The generation of permutation stmts and recording them in
7556 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7558 In case of both multiple types and interleaving, the vector loads and
7559 permutation stmts above are created for every copy. The result vector
7560 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7561 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7563 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7564 on a target that supports unaligned accesses (dr_unaligned_supported)
7565 we generate the following code:
7566 p = initial_addr;
7567 indx = 0;
7568 loop {
7569 p = p + indx * vectype_size;
7570 vec_dest = *(p);
7571 indx = indx + 1;
7574 Otherwise, the data reference is potentially unaligned on a target that
7575 does not support unaligned accesses (dr_explicit_realign_optimized) -
7576 then generate the following code, in which the data in each iteration is
7577 obtained by two vector loads, one from the previous iteration, and one
7578 from the current iteration:
7579 p1 = initial_addr;
7580 msq_init = *(floor(p1))
7581 p2 = initial_addr + VS - 1;
7582 realignment_token = call target_builtin;
7583 indx = 0;
7584 loop {
7585 p2 = p2 + indx * vectype_size
7586 lsq = *(floor(p2))
7587 vec_dest = realign_load (msq, lsq, realignment_token)
7588 indx = indx + 1;
7589 msq = lsq;
7590 } */
7592 /* If the misalignment remains the same throughout the execution of the
7593 loop, we can create the init_addr and permutation mask at the loop
7594 preheader. Otherwise, it needs to be created inside the loop.
7595 This can only occur when vectorizing memory accesses in the inner-loop
7596 nested within an outer-loop that is being vectorized. */
7598 if (nested_in_vect_loop
7599 && !multiple_p (DR_STEP_ALIGNMENT (dr),
7600 GET_MODE_SIZE (TYPE_MODE (vectype))))
7602 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7603 compute_in_loop = true;
7606 if ((alignment_support_scheme == dr_explicit_realign_optimized
7607 || alignment_support_scheme == dr_explicit_realign)
7608 && !compute_in_loop)
7610 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7611 alignment_support_scheme, NULL_TREE,
7612 &at_loop);
7613 if (alignment_support_scheme == dr_explicit_realign_optimized)
7615 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7616 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7617 size_one_node);
7620 else
7621 at_loop = loop;
7623 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7624 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7626 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7627 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7628 else
7629 aggr_type = vectype;
7631 tree vec_mask = NULL_TREE;
7632 tree vec_offset = NULL_TREE;
7633 prev_stmt_info = NULL;
7634 poly_uint64 group_elt = 0;
7635 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
7636 for (j = 0; j < ncopies; j++)
7638 /* 1. Create the vector or array pointer update chain. */
7639 if (j == 0)
7641 bool simd_lane_access_p
7642 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7643 if (simd_lane_access_p
7644 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7645 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7646 && integer_zerop (DR_OFFSET (first_dr))
7647 && integer_zerop (DR_INIT (first_dr))
7648 && alias_sets_conflict_p (get_alias_set (aggr_type),
7649 get_alias_set (TREE_TYPE (ref_type)))
7650 && (alignment_support_scheme == dr_aligned
7651 || alignment_support_scheme == dr_unaligned_supported))
7653 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7654 dataref_offset = build_int_cst (ref_type, 0);
7655 inv_p = false;
7657 else if (first_stmt_for_drptr
7658 && first_stmt != first_stmt_for_drptr)
7660 dataref_ptr
7661 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7662 at_loop, offset, &dummy, gsi,
7663 &ptr_incr, simd_lane_access_p,
7664 &inv_p, byte_offset);
7665 /* Adjust the pointer by the difference to first_stmt. */
7666 data_reference_p ptrdr
7667 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7668 tree diff = fold_convert (sizetype,
7669 size_binop (MINUS_EXPR,
7670 DR_INIT (first_dr),
7671 DR_INIT (ptrdr)));
7672 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7673 stmt, diff);
7675 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7677 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
7678 &dataref_ptr, &vec_offset);
7679 inv_p = false;
7681 else
7682 dataref_ptr
7683 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7684 offset, &dummy, gsi, &ptr_incr,
7685 simd_lane_access_p, &inv_p,
7686 byte_offset);
7687 if (mask)
7688 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
7689 mask_vectype);
7691 else
7693 if (dataref_offset)
7694 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7695 TYPE_SIZE_UNIT (aggr_type));
7696 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7698 gimple *def_stmt;
7699 vect_def_type dt;
7700 vect_is_simple_use (vec_offset, loop_vinfo, &def_stmt, &dt);
7701 vec_offset = vect_get_vec_def_for_stmt_copy (dt, vec_offset);
7703 else
7704 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7705 TYPE_SIZE_UNIT (aggr_type));
7706 if (mask)
7708 gimple *def_stmt;
7709 vect_def_type dt;
7710 vect_is_simple_use (vec_mask, vinfo, &def_stmt, &dt);
7711 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
7715 if (grouped_load || slp_perm)
7716 dr_chain.create (vec_num);
7718 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7720 tree vec_array;
7722 vec_array = create_vector_array (vectype, vec_num);
7724 tree final_mask = NULL_TREE;
7725 if (masked_loop_p)
7726 final_mask = vect_get_loop_mask (gsi, masks, ncopies, vectype, j);
7727 if (vec_mask)
7728 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7729 vec_mask, gsi);
7731 gcall *call;
7732 if (final_mask)
7734 /* Emit:
7735 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
7736 VEC_MASK). */
7737 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7738 tree alias_ptr = build_int_cst (ref_type, align);
7739 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
7740 dataref_ptr, alias_ptr,
7741 final_mask);
7743 else
7745 /* Emit:
7746 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7747 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7748 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
7750 gimple_call_set_lhs (call, vec_array);
7751 gimple_call_set_nothrow (call, true);
7752 new_stmt = call;
7753 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7755 /* Extract each vector into an SSA_NAME. */
7756 for (i = 0; i < vec_num; i++)
7758 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7759 vec_array, i);
7760 dr_chain.quick_push (new_temp);
7763 /* Record the mapping between SSA_NAMEs and statements. */
7764 vect_record_grouped_load_vectors (stmt, dr_chain);
7766 else
7768 for (i = 0; i < vec_num; i++)
7770 tree final_mask = NULL_TREE;
7771 if (masked_loop_p
7772 && memory_access_type != VMAT_INVARIANT)
7773 final_mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
7774 vectype, vec_num * j + i);
7775 if (vec_mask)
7776 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7777 vec_mask, gsi);
7779 if (i > 0)
7780 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7781 stmt, NULL_TREE);
7783 /* 2. Create the vector-load in the loop. */
7784 switch (alignment_support_scheme)
7786 case dr_aligned:
7787 case dr_unaligned_supported:
7789 unsigned int align, misalign;
7791 if (memory_access_type == VMAT_GATHER_SCATTER)
7793 tree scale = size_int (gs_info.scale);
7794 gcall *call;
7795 if (masked_loop_p)
7796 call = gimple_build_call_internal
7797 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
7798 vec_offset, scale, final_mask);
7799 else
7800 call = gimple_build_call_internal
7801 (IFN_GATHER_LOAD, 3, dataref_ptr,
7802 vec_offset, scale);
7803 gimple_call_set_nothrow (call, true);
7804 new_stmt = call;
7805 data_ref = NULL_TREE;
7806 break;
7809 align = DR_TARGET_ALIGNMENT (dr);
7810 if (alignment_support_scheme == dr_aligned)
7812 gcc_assert (aligned_access_p (first_dr));
7813 misalign = 0;
7815 else if (DR_MISALIGNMENT (first_dr) == -1)
7817 align = dr_alignment (vect_dr_behavior (first_dr));
7818 misalign = 0;
7820 else
7821 misalign = DR_MISALIGNMENT (first_dr);
7822 if (dataref_offset == NULL_TREE
7823 && TREE_CODE (dataref_ptr) == SSA_NAME)
7824 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7825 align, misalign);
7827 if (final_mask)
7829 align = least_bit_hwi (misalign | align);
7830 tree ptr = build_int_cst (ref_type, align);
7831 gcall *call
7832 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
7833 dataref_ptr, ptr,
7834 final_mask);
7835 gimple_call_set_nothrow (call, true);
7836 new_stmt = call;
7837 data_ref = NULL_TREE;
7839 else
7841 data_ref
7842 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7843 dataref_offset
7844 ? dataref_offset
7845 : build_int_cst (ref_type, 0));
7846 if (alignment_support_scheme == dr_aligned)
7848 else if (DR_MISALIGNMENT (first_dr) == -1)
7849 TREE_TYPE (data_ref)
7850 = build_aligned_type (TREE_TYPE (data_ref),
7851 align * BITS_PER_UNIT);
7852 else
7853 TREE_TYPE (data_ref)
7854 = build_aligned_type (TREE_TYPE (data_ref),
7855 TYPE_ALIGN (elem_type));
7857 break;
7859 case dr_explicit_realign:
7861 tree ptr, bump;
7863 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7865 if (compute_in_loop)
7866 msq = vect_setup_realignment (first_stmt, gsi,
7867 &realignment_token,
7868 dr_explicit_realign,
7869 dataref_ptr, NULL);
7871 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7872 ptr = copy_ssa_name (dataref_ptr);
7873 else
7874 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7875 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7876 new_stmt = gimple_build_assign
7877 (ptr, BIT_AND_EXPR, dataref_ptr,
7878 build_int_cst
7879 (TREE_TYPE (dataref_ptr),
7880 -(HOST_WIDE_INT) align));
7881 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7882 data_ref
7883 = build2 (MEM_REF, vectype, ptr,
7884 build_int_cst (ref_type, 0));
7885 vec_dest = vect_create_destination_var (scalar_dest,
7886 vectype);
7887 new_stmt = gimple_build_assign (vec_dest, data_ref);
7888 new_temp = make_ssa_name (vec_dest, new_stmt);
7889 gimple_assign_set_lhs (new_stmt, new_temp);
7890 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7891 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7892 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7893 msq = new_temp;
7895 bump = size_binop (MULT_EXPR, vs,
7896 TYPE_SIZE_UNIT (elem_type));
7897 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7898 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7899 new_stmt = gimple_build_assign
7900 (NULL_TREE, BIT_AND_EXPR, ptr,
7901 build_int_cst
7902 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
7903 ptr = copy_ssa_name (ptr, new_stmt);
7904 gimple_assign_set_lhs (new_stmt, ptr);
7905 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7906 data_ref
7907 = build2 (MEM_REF, vectype, ptr,
7908 build_int_cst (ref_type, 0));
7909 break;
7911 case dr_explicit_realign_optimized:
7913 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7914 new_temp = copy_ssa_name (dataref_ptr);
7915 else
7916 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7917 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7918 new_stmt = gimple_build_assign
7919 (new_temp, BIT_AND_EXPR, dataref_ptr,
7920 build_int_cst (TREE_TYPE (dataref_ptr),
7921 -(HOST_WIDE_INT) align));
7922 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7923 data_ref
7924 = build2 (MEM_REF, vectype, new_temp,
7925 build_int_cst (ref_type, 0));
7926 break;
7928 default:
7929 gcc_unreachable ();
7931 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7932 /* DATA_REF is null if we've already built the statement. */
7933 if (data_ref)
7934 new_stmt = gimple_build_assign (vec_dest, data_ref);
7935 new_temp = make_ssa_name (vec_dest, new_stmt);
7936 gimple_set_lhs (new_stmt, new_temp);
7937 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7939 /* 3. Handle explicit realignment if necessary/supported.
7940 Create in loop:
7941 vec_dest = realign_load (msq, lsq, realignment_token) */
7942 if (alignment_support_scheme == dr_explicit_realign_optimized
7943 || alignment_support_scheme == dr_explicit_realign)
7945 lsq = gimple_assign_lhs (new_stmt);
7946 if (!realignment_token)
7947 realignment_token = dataref_ptr;
7948 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7949 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7950 msq, lsq, realignment_token);
7951 new_temp = make_ssa_name (vec_dest, new_stmt);
7952 gimple_assign_set_lhs (new_stmt, new_temp);
7953 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7955 if (alignment_support_scheme == dr_explicit_realign_optimized)
7957 gcc_assert (phi);
7958 if (i == vec_num - 1 && j == ncopies - 1)
7959 add_phi_arg (phi, lsq,
7960 loop_latch_edge (containing_loop),
7961 UNKNOWN_LOCATION);
7962 msq = lsq;
7966 /* 4. Handle invariant-load. */
7967 if (inv_p && !bb_vinfo)
7969 gcc_assert (!grouped_load);
7970 /* If we have versioned for aliasing or the loop doesn't
7971 have any data dependencies that would preclude this,
7972 then we are sure this is a loop invariant load and
7973 thus we can insert it on the preheader edge. */
7974 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7975 && !nested_in_vect_loop
7976 && hoist_defs_of_uses (stmt, loop))
7978 if (dump_enabled_p ())
7980 dump_printf_loc (MSG_NOTE, vect_location,
7981 "hoisting out of the vectorized "
7982 "loop: ");
7983 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7985 tree tem = copy_ssa_name (scalar_dest);
7986 gsi_insert_on_edge_immediate
7987 (loop_preheader_edge (loop),
7988 gimple_build_assign (tem,
7989 unshare_expr
7990 (gimple_assign_rhs1 (stmt))));
7991 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7992 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7993 set_vinfo_for_stmt (new_stmt,
7994 new_stmt_vec_info (new_stmt, vinfo));
7996 else
7998 gimple_stmt_iterator gsi2 = *gsi;
7999 gsi_next (&gsi2);
8000 new_temp = vect_init_vector (stmt, scalar_dest,
8001 vectype, &gsi2);
8002 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8006 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8008 tree perm_mask = perm_mask_for_reverse (vectype);
8009 new_temp = permute_vec_elements (new_temp, new_temp,
8010 perm_mask, stmt, gsi);
8011 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8014 /* Collect vector loads and later create their permutation in
8015 vect_transform_grouped_load (). */
8016 if (grouped_load || slp_perm)
8017 dr_chain.quick_push (new_temp);
8019 /* Store vector loads in the corresponding SLP_NODE. */
8020 if (slp && !slp_perm)
8021 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8023 /* With SLP permutation we load the gaps as well, without
8024 we need to skip the gaps after we manage to fully load
8025 all elements. group_gap_adj is GROUP_SIZE here. */
8026 group_elt += nunits;
8027 if (maybe_ne (group_gap_adj, 0U)
8028 && !slp_perm
8029 && known_eq (group_elt, group_size - group_gap_adj))
8031 poly_wide_int bump_val
8032 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8033 * group_gap_adj);
8034 tree bump = wide_int_to_tree (sizetype, bump_val);
8035 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8036 stmt, bump);
8037 group_elt = 0;
8040 /* Bump the vector pointer to account for a gap or for excess
8041 elements loaded for a permuted SLP load. */
8042 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
8044 poly_wide_int bump_val
8045 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8046 * group_gap_adj);
8047 tree bump = wide_int_to_tree (sizetype, bump_val);
8048 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8049 stmt, bump);
8053 if (slp && !slp_perm)
8054 continue;
8056 if (slp_perm)
8058 unsigned n_perms;
8059 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
8060 slp_node_instance, false,
8061 &n_perms))
8063 dr_chain.release ();
8064 return false;
8067 else
8069 if (grouped_load)
8071 if (memory_access_type != VMAT_LOAD_STORE_LANES)
8072 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
8073 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8075 else
8077 if (j == 0)
8078 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8079 else
8080 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8081 prev_stmt_info = vinfo_for_stmt (new_stmt);
8084 dr_chain.release ();
8087 return true;
8090 /* Function vect_is_simple_cond.
8092 Input:
8093 LOOP - the loop that is being vectorized.
8094 COND - Condition that is checked for simple use.
8096 Output:
8097 *COMP_VECTYPE - the vector type for the comparison.
8098 *DTS - The def types for the arguments of the comparison
8100 Returns whether a COND can be vectorized. Checks whether
8101 condition operands are supportable using vec_is_simple_use. */
8103 static bool
8104 vect_is_simple_cond (tree cond, vec_info *vinfo,
8105 tree *comp_vectype, enum vect_def_type *dts,
8106 tree vectype)
8108 tree lhs, rhs;
8109 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8111 /* Mask case. */
8112 if (TREE_CODE (cond) == SSA_NAME
8113 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
8115 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
8116 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
8117 &dts[0], comp_vectype)
8118 || !*comp_vectype
8119 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8120 return false;
8121 return true;
8124 if (!COMPARISON_CLASS_P (cond))
8125 return false;
8127 lhs = TREE_OPERAND (cond, 0);
8128 rhs = TREE_OPERAND (cond, 1);
8130 if (TREE_CODE (lhs) == SSA_NAME)
8132 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
8133 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
8134 return false;
8136 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8137 || TREE_CODE (lhs) == FIXED_CST)
8138 dts[0] = vect_constant_def;
8139 else
8140 return false;
8142 if (TREE_CODE (rhs) == SSA_NAME)
8144 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
8145 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
8146 return false;
8148 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8149 || TREE_CODE (rhs) == FIXED_CST)
8150 dts[1] = vect_constant_def;
8151 else
8152 return false;
8154 if (vectype1 && vectype2
8155 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8156 TYPE_VECTOR_SUBPARTS (vectype2)))
8157 return false;
8159 *comp_vectype = vectype1 ? vectype1 : vectype2;
8160 /* Invariant comparison. */
8161 if (! *comp_vectype)
8163 tree scalar_type = TREE_TYPE (lhs);
8164 /* If we can widen the comparison to match vectype do so. */
8165 if (INTEGRAL_TYPE_P (scalar_type)
8166 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8167 TYPE_SIZE (TREE_TYPE (vectype))))
8168 scalar_type = build_nonstandard_integer_type
8169 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8170 TYPE_UNSIGNED (scalar_type));
8171 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8174 return true;
8177 /* vectorizable_condition.
8179 Check if STMT is conditional modify expression that can be vectorized.
8180 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8181 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
8182 at GSI.
8184 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
8185 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
8186 else clause if it is 2).
8188 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8190 bool
8191 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
8192 gimple **vec_stmt, tree reduc_def, int reduc_index,
8193 slp_tree slp_node)
8195 tree scalar_dest = NULL_TREE;
8196 tree vec_dest = NULL_TREE;
8197 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8198 tree then_clause, else_clause;
8199 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8200 tree comp_vectype = NULL_TREE;
8201 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8202 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
8203 tree vec_compare;
8204 tree new_temp;
8205 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8206 enum vect_def_type dts[4]
8207 = {vect_unknown_def_type, vect_unknown_def_type,
8208 vect_unknown_def_type, vect_unknown_def_type};
8209 int ndts = 4;
8210 int ncopies;
8211 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8212 stmt_vec_info prev_stmt_info = NULL;
8213 int i, j;
8214 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8215 vec<tree> vec_oprnds0 = vNULL;
8216 vec<tree> vec_oprnds1 = vNULL;
8217 vec<tree> vec_oprnds2 = vNULL;
8218 vec<tree> vec_oprnds3 = vNULL;
8219 tree vec_cmp_type;
8220 bool masked = false;
8222 if (reduc_index && STMT_SLP_TYPE (stmt_info))
8223 return false;
8225 vect_reduction_type reduction_type
8226 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8227 if (reduction_type == TREE_CODE_REDUCTION)
8229 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8230 return false;
8232 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8233 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8234 && reduc_def))
8235 return false;
8237 /* FORNOW: not yet supported. */
8238 if (STMT_VINFO_LIVE_P (stmt_info))
8240 if (dump_enabled_p ())
8241 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8242 "value used after loop.\n");
8243 return false;
8247 /* Is vectorizable conditional operation? */
8248 if (!is_gimple_assign (stmt))
8249 return false;
8251 code = gimple_assign_rhs_code (stmt);
8253 if (code != COND_EXPR)
8254 return false;
8256 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8257 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8259 if (slp_node)
8260 ncopies = 1;
8261 else
8262 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8264 gcc_assert (ncopies >= 1);
8265 if (reduc_index && ncopies > 1)
8266 return false; /* FORNOW */
8268 cond_expr = gimple_assign_rhs1 (stmt);
8269 then_clause = gimple_assign_rhs2 (stmt);
8270 else_clause = gimple_assign_rhs3 (stmt);
8272 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8273 &comp_vectype, &dts[0], vectype)
8274 || !comp_vectype)
8275 return false;
8277 gimple *def_stmt;
8278 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
8279 &vectype1))
8280 return false;
8281 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
8282 &vectype2))
8283 return false;
8285 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8286 return false;
8288 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
8289 return false;
8291 masked = !COMPARISON_CLASS_P (cond_expr);
8292 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8294 if (vec_cmp_type == NULL_TREE)
8295 return false;
8297 cond_code = TREE_CODE (cond_expr);
8298 if (!masked)
8300 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8301 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8304 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8306 /* Boolean values may have another representation in vectors
8307 and therefore we prefer bit operations over comparison for
8308 them (which also works for scalar masks). We store opcodes
8309 to use in bitop1 and bitop2. Statement is vectorized as
8310 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8311 depending on bitop1 and bitop2 arity. */
8312 switch (cond_code)
8314 case GT_EXPR:
8315 bitop1 = BIT_NOT_EXPR;
8316 bitop2 = BIT_AND_EXPR;
8317 break;
8318 case GE_EXPR:
8319 bitop1 = BIT_NOT_EXPR;
8320 bitop2 = BIT_IOR_EXPR;
8321 break;
8322 case LT_EXPR:
8323 bitop1 = BIT_NOT_EXPR;
8324 bitop2 = BIT_AND_EXPR;
8325 std::swap (cond_expr0, cond_expr1);
8326 break;
8327 case LE_EXPR:
8328 bitop1 = BIT_NOT_EXPR;
8329 bitop2 = BIT_IOR_EXPR;
8330 std::swap (cond_expr0, cond_expr1);
8331 break;
8332 case NE_EXPR:
8333 bitop1 = BIT_XOR_EXPR;
8334 break;
8335 case EQ_EXPR:
8336 bitop1 = BIT_XOR_EXPR;
8337 bitop2 = BIT_NOT_EXPR;
8338 break;
8339 default:
8340 return false;
8342 cond_code = SSA_NAME;
8345 if (!vec_stmt)
8347 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8348 if (bitop1 != NOP_EXPR)
8350 machine_mode mode = TYPE_MODE (comp_vectype);
8351 optab optab;
8353 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8354 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8355 return false;
8357 if (bitop2 != NOP_EXPR)
8359 optab = optab_for_tree_code (bitop2, comp_vectype,
8360 optab_default);
8361 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8362 return false;
8365 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8366 cond_code))
8368 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
8369 return true;
8371 return false;
8374 /* Transform. */
8376 if (!slp_node)
8378 vec_oprnds0.create (1);
8379 vec_oprnds1.create (1);
8380 vec_oprnds2.create (1);
8381 vec_oprnds3.create (1);
8384 /* Handle def. */
8385 scalar_dest = gimple_assign_lhs (stmt);
8386 if (reduction_type != EXTRACT_LAST_REDUCTION)
8387 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8389 /* Handle cond expr. */
8390 for (j = 0; j < ncopies; j++)
8392 gimple *new_stmt = NULL;
8393 if (j == 0)
8395 if (slp_node)
8397 auto_vec<tree, 4> ops;
8398 auto_vec<vec<tree>, 4> vec_defs;
8400 if (masked)
8401 ops.safe_push (cond_expr);
8402 else
8404 ops.safe_push (cond_expr0);
8405 ops.safe_push (cond_expr1);
8407 ops.safe_push (then_clause);
8408 ops.safe_push (else_clause);
8409 vect_get_slp_defs (ops, slp_node, &vec_defs);
8410 vec_oprnds3 = vec_defs.pop ();
8411 vec_oprnds2 = vec_defs.pop ();
8412 if (!masked)
8413 vec_oprnds1 = vec_defs.pop ();
8414 vec_oprnds0 = vec_defs.pop ();
8416 else
8418 gimple *gtemp;
8419 if (masked)
8421 vec_cond_lhs
8422 = vect_get_vec_def_for_operand (cond_expr, stmt,
8423 comp_vectype);
8424 vect_is_simple_use (cond_expr, stmt_info->vinfo,
8425 &gtemp, &dts[0]);
8427 else
8429 vec_cond_lhs
8430 = vect_get_vec_def_for_operand (cond_expr0,
8431 stmt, comp_vectype);
8432 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8434 vec_cond_rhs
8435 = vect_get_vec_def_for_operand (cond_expr1,
8436 stmt, comp_vectype);
8437 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
8439 if (reduc_index == 1)
8440 vec_then_clause = reduc_def;
8441 else
8443 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8444 stmt);
8445 vect_is_simple_use (then_clause, loop_vinfo,
8446 &gtemp, &dts[2]);
8448 if (reduc_index == 2)
8449 vec_else_clause = reduc_def;
8450 else
8452 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8453 stmt);
8454 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
8458 else
8460 vec_cond_lhs
8461 = vect_get_vec_def_for_stmt_copy (dts[0],
8462 vec_oprnds0.pop ());
8463 if (!masked)
8464 vec_cond_rhs
8465 = vect_get_vec_def_for_stmt_copy (dts[1],
8466 vec_oprnds1.pop ());
8468 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8469 vec_oprnds2.pop ());
8470 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8471 vec_oprnds3.pop ());
8474 if (!slp_node)
8476 vec_oprnds0.quick_push (vec_cond_lhs);
8477 if (!masked)
8478 vec_oprnds1.quick_push (vec_cond_rhs);
8479 vec_oprnds2.quick_push (vec_then_clause);
8480 vec_oprnds3.quick_push (vec_else_clause);
8483 /* Arguments are ready. Create the new vector stmt. */
8484 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8486 vec_then_clause = vec_oprnds2[i];
8487 vec_else_clause = vec_oprnds3[i];
8489 if (masked)
8490 vec_compare = vec_cond_lhs;
8491 else
8493 vec_cond_rhs = vec_oprnds1[i];
8494 if (bitop1 == NOP_EXPR)
8495 vec_compare = build2 (cond_code, vec_cmp_type,
8496 vec_cond_lhs, vec_cond_rhs);
8497 else
8499 new_temp = make_ssa_name (vec_cmp_type);
8500 if (bitop1 == BIT_NOT_EXPR)
8501 new_stmt = gimple_build_assign (new_temp, bitop1,
8502 vec_cond_rhs);
8503 else
8504 new_stmt
8505 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8506 vec_cond_rhs);
8507 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8508 if (bitop2 == NOP_EXPR)
8509 vec_compare = new_temp;
8510 else if (bitop2 == BIT_NOT_EXPR)
8512 /* Instead of doing ~x ? y : z do x ? z : y. */
8513 vec_compare = new_temp;
8514 std::swap (vec_then_clause, vec_else_clause);
8516 else
8518 vec_compare = make_ssa_name (vec_cmp_type);
8519 new_stmt
8520 = gimple_build_assign (vec_compare, bitop2,
8521 vec_cond_lhs, new_temp);
8522 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8526 if (reduction_type == EXTRACT_LAST_REDUCTION)
8528 if (!is_gimple_val (vec_compare))
8530 tree vec_compare_name = make_ssa_name (vec_cmp_type);
8531 new_stmt = gimple_build_assign (vec_compare_name,
8532 vec_compare);
8533 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8534 vec_compare = vec_compare_name;
8536 gcc_assert (reduc_index == 2);
8537 new_stmt = gimple_build_call_internal
8538 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
8539 vec_then_clause);
8540 gimple_call_set_lhs (new_stmt, scalar_dest);
8541 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
8542 if (stmt == gsi_stmt (*gsi))
8543 vect_finish_replace_stmt (stmt, new_stmt);
8544 else
8546 /* In this case we're moving the definition to later in the
8547 block. That doesn't matter because the only uses of the
8548 lhs are in phi statements. */
8549 gimple_stmt_iterator old_gsi = gsi_for_stmt (stmt);
8550 gsi_remove (&old_gsi, true);
8551 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8554 else
8556 new_temp = make_ssa_name (vec_dest);
8557 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8558 vec_compare, vec_then_clause,
8559 vec_else_clause);
8560 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8562 if (slp_node)
8563 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8566 if (slp_node)
8567 continue;
8569 if (j == 0)
8570 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8571 else
8572 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8574 prev_stmt_info = vinfo_for_stmt (new_stmt);
8577 vec_oprnds0.release ();
8578 vec_oprnds1.release ();
8579 vec_oprnds2.release ();
8580 vec_oprnds3.release ();
8582 return true;
8585 /* vectorizable_comparison.
8587 Check if STMT is comparison expression that can be vectorized.
8588 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8589 comparison, put it in VEC_STMT, and insert it at GSI.
8591 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8593 static bool
8594 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8595 gimple **vec_stmt, tree reduc_def,
8596 slp_tree slp_node)
8598 tree lhs, rhs1, rhs2;
8599 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8600 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8601 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8602 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8603 tree new_temp;
8604 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8605 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
8606 int ndts = 2;
8607 poly_uint64 nunits;
8608 int ncopies;
8609 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8610 stmt_vec_info prev_stmt_info = NULL;
8611 int i, j;
8612 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8613 vec<tree> vec_oprnds0 = vNULL;
8614 vec<tree> vec_oprnds1 = vNULL;
8615 gimple *def_stmt;
8616 tree mask_type;
8617 tree mask;
8619 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8620 return false;
8622 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
8623 return false;
8625 mask_type = vectype;
8626 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8628 if (slp_node)
8629 ncopies = 1;
8630 else
8631 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8633 gcc_assert (ncopies >= 1);
8634 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8635 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8636 && reduc_def))
8637 return false;
8639 if (STMT_VINFO_LIVE_P (stmt_info))
8641 if (dump_enabled_p ())
8642 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8643 "value used after loop.\n");
8644 return false;
8647 if (!is_gimple_assign (stmt))
8648 return false;
8650 code = gimple_assign_rhs_code (stmt);
8652 if (TREE_CODE_CLASS (code) != tcc_comparison)
8653 return false;
8655 rhs1 = gimple_assign_rhs1 (stmt);
8656 rhs2 = gimple_assign_rhs2 (stmt);
8658 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8659 &dts[0], &vectype1))
8660 return false;
8662 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8663 &dts[1], &vectype2))
8664 return false;
8666 if (vectype1 && vectype2
8667 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8668 TYPE_VECTOR_SUBPARTS (vectype2)))
8669 return false;
8671 vectype = vectype1 ? vectype1 : vectype2;
8673 /* Invariant comparison. */
8674 if (!vectype)
8676 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
8677 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
8678 return false;
8680 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
8681 return false;
8683 /* Can't compare mask and non-mask types. */
8684 if (vectype1 && vectype2
8685 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
8686 return false;
8688 /* Boolean values may have another representation in vectors
8689 and therefore we prefer bit operations over comparison for
8690 them (which also works for scalar masks). We store opcodes
8691 to use in bitop1 and bitop2. Statement is vectorized as
8692 BITOP2 (rhs1 BITOP1 rhs2) or
8693 rhs1 BITOP2 (BITOP1 rhs2)
8694 depending on bitop1 and bitop2 arity. */
8695 if (VECTOR_BOOLEAN_TYPE_P (vectype))
8697 if (code == GT_EXPR)
8699 bitop1 = BIT_NOT_EXPR;
8700 bitop2 = BIT_AND_EXPR;
8702 else if (code == GE_EXPR)
8704 bitop1 = BIT_NOT_EXPR;
8705 bitop2 = BIT_IOR_EXPR;
8707 else if (code == LT_EXPR)
8709 bitop1 = BIT_NOT_EXPR;
8710 bitop2 = BIT_AND_EXPR;
8711 std::swap (rhs1, rhs2);
8712 std::swap (dts[0], dts[1]);
8714 else if (code == LE_EXPR)
8716 bitop1 = BIT_NOT_EXPR;
8717 bitop2 = BIT_IOR_EXPR;
8718 std::swap (rhs1, rhs2);
8719 std::swap (dts[0], dts[1]);
8721 else
8723 bitop1 = BIT_XOR_EXPR;
8724 if (code == EQ_EXPR)
8725 bitop2 = BIT_NOT_EXPR;
8729 if (!vec_stmt)
8731 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
8732 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
8733 dts, ndts, NULL, NULL);
8734 if (bitop1 == NOP_EXPR)
8735 return expand_vec_cmp_expr_p (vectype, mask_type, code);
8736 else
8738 machine_mode mode = TYPE_MODE (vectype);
8739 optab optab;
8741 optab = optab_for_tree_code (bitop1, vectype, optab_default);
8742 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8743 return false;
8745 if (bitop2 != NOP_EXPR)
8747 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8748 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8749 return false;
8751 return true;
8755 /* Transform. */
8756 if (!slp_node)
8758 vec_oprnds0.create (1);
8759 vec_oprnds1.create (1);
8762 /* Handle def. */
8763 lhs = gimple_assign_lhs (stmt);
8764 mask = vect_create_destination_var (lhs, mask_type);
8766 /* Handle cmp expr. */
8767 for (j = 0; j < ncopies; j++)
8769 gassign *new_stmt = NULL;
8770 if (j == 0)
8772 if (slp_node)
8774 auto_vec<tree, 2> ops;
8775 auto_vec<vec<tree>, 2> vec_defs;
8777 ops.safe_push (rhs1);
8778 ops.safe_push (rhs2);
8779 vect_get_slp_defs (ops, slp_node, &vec_defs);
8780 vec_oprnds1 = vec_defs.pop ();
8781 vec_oprnds0 = vec_defs.pop ();
8783 else
8785 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8786 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
8789 else
8791 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8792 vec_oprnds0.pop ());
8793 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8794 vec_oprnds1.pop ());
8797 if (!slp_node)
8799 vec_oprnds0.quick_push (vec_rhs1);
8800 vec_oprnds1.quick_push (vec_rhs2);
8803 /* Arguments are ready. Create the new vector stmt. */
8804 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8806 vec_rhs2 = vec_oprnds1[i];
8808 new_temp = make_ssa_name (mask);
8809 if (bitop1 == NOP_EXPR)
8811 new_stmt = gimple_build_assign (new_temp, code,
8812 vec_rhs1, vec_rhs2);
8813 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8815 else
8817 if (bitop1 == BIT_NOT_EXPR)
8818 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8819 else
8820 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8821 vec_rhs2);
8822 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8823 if (bitop2 != NOP_EXPR)
8825 tree res = make_ssa_name (mask);
8826 if (bitop2 == BIT_NOT_EXPR)
8827 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8828 else
8829 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8830 new_temp);
8831 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8834 if (slp_node)
8835 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8838 if (slp_node)
8839 continue;
8841 if (j == 0)
8842 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8843 else
8844 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8846 prev_stmt_info = vinfo_for_stmt (new_stmt);
8849 vec_oprnds0.release ();
8850 vec_oprnds1.release ();
8852 return true;
8855 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
8856 can handle all live statements in the node. Otherwise return true
8857 if STMT is not live or if vectorizable_live_operation can handle it.
8858 GSI and VEC_STMT are as for vectorizable_live_operation. */
8860 static bool
8861 can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
8862 slp_tree slp_node, gimple **vec_stmt)
8864 if (slp_node)
8866 gimple *slp_stmt;
8867 unsigned int i;
8868 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8870 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8871 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8872 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8873 vec_stmt))
8874 return false;
8877 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
8878 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
8879 return false;
8881 return true;
8884 /* Make sure the statement is vectorizable. */
8886 bool
8887 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
8888 slp_instance node_instance)
8890 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8891 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8892 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
8893 bool ok;
8894 gimple *pattern_stmt;
8895 gimple_seq pattern_def_seq;
8897 if (dump_enabled_p ())
8899 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8900 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8903 if (gimple_has_volatile_ops (stmt))
8905 if (dump_enabled_p ())
8906 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8907 "not vectorized: stmt has volatile operands\n");
8909 return false;
8912 /* Skip stmts that do not need to be vectorized. In loops this is expected
8913 to include:
8914 - the COND_EXPR which is the loop exit condition
8915 - any LABEL_EXPRs in the loop
8916 - computations that are used only for array indexing or loop control.
8917 In basic blocks we only analyze statements that are a part of some SLP
8918 instance, therefore, all the statements are relevant.
8920 Pattern statement needs to be analyzed instead of the original statement
8921 if the original statement is not relevant. Otherwise, we analyze both
8922 statements. In basic blocks we are called from some SLP instance
8923 traversal, don't analyze pattern stmts instead, the pattern stmts
8924 already will be part of SLP instance. */
8926 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
8927 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8928 && !STMT_VINFO_LIVE_P (stmt_info))
8930 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8931 && pattern_stmt
8932 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8933 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8935 /* Analyze PATTERN_STMT instead of the original stmt. */
8936 stmt = pattern_stmt;
8937 stmt_info = vinfo_for_stmt (pattern_stmt);
8938 if (dump_enabled_p ())
8940 dump_printf_loc (MSG_NOTE, vect_location,
8941 "==> examining pattern statement: ");
8942 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8945 else
8947 if (dump_enabled_p ())
8948 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
8950 return true;
8953 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8954 && node == NULL
8955 && pattern_stmt
8956 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8957 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8959 /* Analyze PATTERN_STMT too. */
8960 if (dump_enabled_p ())
8962 dump_printf_loc (MSG_NOTE, vect_location,
8963 "==> examining pattern statement: ");
8964 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8967 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
8968 node_instance))
8969 return false;
8972 if (is_pattern_stmt_p (stmt_info)
8973 && node == NULL
8974 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
8976 gimple_stmt_iterator si;
8978 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8980 gimple *pattern_def_stmt = gsi_stmt (si);
8981 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8982 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8984 /* Analyze def stmt of STMT if it's a pattern stmt. */
8985 if (dump_enabled_p ())
8987 dump_printf_loc (MSG_NOTE, vect_location,
8988 "==> examining pattern def statement: ");
8989 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8992 if (!vect_analyze_stmt (pattern_def_stmt,
8993 need_to_vectorize, node, node_instance))
8994 return false;
8999 switch (STMT_VINFO_DEF_TYPE (stmt_info))
9001 case vect_internal_def:
9002 break;
9004 case vect_reduction_def:
9005 case vect_nested_cycle:
9006 gcc_assert (!bb_vinfo
9007 && (relevance == vect_used_in_outer
9008 || relevance == vect_used_in_outer_by_reduction
9009 || relevance == vect_used_by_reduction
9010 || relevance == vect_unused_in_scope
9011 || relevance == vect_used_only_live));
9012 break;
9014 case vect_induction_def:
9015 gcc_assert (!bb_vinfo);
9016 break;
9018 case vect_constant_def:
9019 case vect_external_def:
9020 case vect_unknown_def_type:
9021 default:
9022 gcc_unreachable ();
9025 if (STMT_VINFO_RELEVANT_P (stmt_info))
9027 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
9028 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9029 || (is_gimple_call (stmt)
9030 && gimple_call_lhs (stmt) == NULL_TREE));
9031 *need_to_vectorize = true;
9034 if (PURE_SLP_STMT (stmt_info) && !node)
9036 dump_printf_loc (MSG_NOTE, vect_location,
9037 "handled only by SLP analysis\n");
9038 return true;
9041 ok = true;
9042 if (!bb_vinfo
9043 && (STMT_VINFO_RELEVANT_P (stmt_info)
9044 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
9045 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
9046 || vectorizable_conversion (stmt, NULL, NULL, node)
9047 || vectorizable_shift (stmt, NULL, NULL, node)
9048 || vectorizable_operation (stmt, NULL, NULL, node)
9049 || vectorizable_assignment (stmt, NULL, NULL, node)
9050 || vectorizable_load (stmt, NULL, NULL, node, NULL)
9051 || vectorizable_call (stmt, NULL, NULL, node)
9052 || vectorizable_store (stmt, NULL, NULL, node)
9053 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
9054 || vectorizable_induction (stmt, NULL, NULL, node)
9055 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
9056 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
9057 else
9059 if (bb_vinfo)
9060 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
9061 || vectorizable_conversion (stmt, NULL, NULL, node)
9062 || vectorizable_shift (stmt, NULL, NULL, node)
9063 || vectorizable_operation (stmt, NULL, NULL, node)
9064 || vectorizable_assignment (stmt, NULL, NULL, node)
9065 || vectorizable_load (stmt, NULL, NULL, node, NULL)
9066 || vectorizable_call (stmt, NULL, NULL, node)
9067 || vectorizable_store (stmt, NULL, NULL, node)
9068 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
9069 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
9072 if (!ok)
9074 if (dump_enabled_p ())
9076 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9077 "not vectorized: relevant stmt not ");
9078 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
9079 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
9082 return false;
9085 if (bb_vinfo)
9086 return true;
9088 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9089 need extra handling, except for vectorizable reductions. */
9090 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9091 && !can_vectorize_live_stmts (stmt, NULL, node, NULL))
9093 if (dump_enabled_p ())
9095 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9096 "not vectorized: live stmt not supported: ");
9097 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
9100 return false;
9103 return true;
9107 /* Function vect_transform_stmt.
9109 Create a vectorized stmt to replace STMT, and insert it at BSI. */
9111 bool
9112 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
9113 bool *grouped_store, slp_tree slp_node,
9114 slp_instance slp_node_instance)
9116 bool is_store = false;
9117 gimple *vec_stmt = NULL;
9118 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9119 bool done;
9121 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
9122 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9124 switch (STMT_VINFO_TYPE (stmt_info))
9126 case type_demotion_vec_info_type:
9127 case type_promotion_vec_info_type:
9128 case type_conversion_vec_info_type:
9129 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
9130 gcc_assert (done);
9131 break;
9133 case induc_vec_info_type:
9134 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
9135 gcc_assert (done);
9136 break;
9138 case shift_vec_info_type:
9139 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
9140 gcc_assert (done);
9141 break;
9143 case op_vec_info_type:
9144 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
9145 gcc_assert (done);
9146 break;
9148 case assignment_vec_info_type:
9149 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
9150 gcc_assert (done);
9151 break;
9153 case load_vec_info_type:
9154 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
9155 slp_node_instance);
9156 gcc_assert (done);
9157 break;
9159 case store_vec_info_type:
9160 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
9161 gcc_assert (done);
9162 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
9164 /* In case of interleaving, the whole chain is vectorized when the
9165 last store in the chain is reached. Store stmts before the last
9166 one are skipped, and there vec_stmt_info shouldn't be freed
9167 meanwhile. */
9168 *grouped_store = true;
9169 if (STMT_VINFO_VEC_STMT (stmt_info))
9170 is_store = true;
9172 else
9173 is_store = true;
9174 break;
9176 case condition_vec_info_type:
9177 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
9178 gcc_assert (done);
9179 break;
9181 case comparison_vec_info_type:
9182 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
9183 gcc_assert (done);
9184 break;
9186 case call_vec_info_type:
9187 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
9188 stmt = gsi_stmt (*gsi);
9189 break;
9191 case call_simd_clone_vec_info_type:
9192 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
9193 stmt = gsi_stmt (*gsi);
9194 break;
9196 case reduc_vec_info_type:
9197 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
9198 slp_node_instance);
9199 gcc_assert (done);
9200 break;
9202 default:
9203 if (!STMT_VINFO_LIVE_P (stmt_info))
9205 if (dump_enabled_p ())
9206 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9207 "stmt not supported.\n");
9208 gcc_unreachable ();
9212 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9213 This would break hybrid SLP vectorization. */
9214 if (slp_node)
9215 gcc_assert (!vec_stmt
9216 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
9218 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9219 is being vectorized, but outside the immediately enclosing loop. */
9220 if (vec_stmt
9221 && STMT_VINFO_LOOP_VINFO (stmt_info)
9222 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
9223 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
9224 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9225 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
9226 || STMT_VINFO_RELEVANT (stmt_info) ==
9227 vect_used_in_outer_by_reduction))
9229 struct loop *innerloop = LOOP_VINFO_LOOP (
9230 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
9231 imm_use_iterator imm_iter;
9232 use_operand_p use_p;
9233 tree scalar_dest;
9234 gimple *exit_phi;
9236 if (dump_enabled_p ())
9237 dump_printf_loc (MSG_NOTE, vect_location,
9238 "Record the vdef for outer-loop vectorization.\n");
9240 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9241 (to be used when vectorizing outer-loop stmts that use the DEF of
9242 STMT). */
9243 if (gimple_code (stmt) == GIMPLE_PHI)
9244 scalar_dest = PHI_RESULT (stmt);
9245 else
9246 scalar_dest = gimple_assign_lhs (stmt);
9248 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9250 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9252 exit_phi = USE_STMT (use_p);
9253 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
9258 /* Handle stmts whose DEF is used outside the loop-nest that is
9259 being vectorized. */
9260 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
9262 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
9263 gcc_assert (done);
9266 if (vec_stmt)
9267 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
9269 return is_store;
9273 /* Remove a group of stores (for SLP or interleaving), free their
9274 stmt_vec_info. */
9276 void
9277 vect_remove_stores (gimple *first_stmt)
9279 gimple *next = first_stmt;
9280 gimple *tmp;
9281 gimple_stmt_iterator next_si;
9283 while (next)
9285 stmt_vec_info stmt_info = vinfo_for_stmt (next);
9287 tmp = GROUP_NEXT_ELEMENT (stmt_info);
9288 if (is_pattern_stmt_p (stmt_info))
9289 next = STMT_VINFO_RELATED_STMT (stmt_info);
9290 /* Free the attached stmt_vec_info and remove the stmt. */
9291 next_si = gsi_for_stmt (next);
9292 unlink_stmt_vdef (next);
9293 gsi_remove (&next_si, true);
9294 release_defs (next);
9295 free_stmt_vec_info (next);
9296 next = tmp;
9301 /* Function new_stmt_vec_info.
9303 Create and initialize a new stmt_vec_info struct for STMT. */
9305 stmt_vec_info
9306 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
9308 stmt_vec_info res;
9309 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
9311 STMT_VINFO_TYPE (res) = undef_vec_info_type;
9312 STMT_VINFO_STMT (res) = stmt;
9313 res->vinfo = vinfo;
9314 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
9315 STMT_VINFO_LIVE_P (res) = false;
9316 STMT_VINFO_VECTYPE (res) = NULL;
9317 STMT_VINFO_VEC_STMT (res) = NULL;
9318 STMT_VINFO_VECTORIZABLE (res) = true;
9319 STMT_VINFO_IN_PATTERN_P (res) = false;
9320 STMT_VINFO_RELATED_STMT (res) = NULL;
9321 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
9322 STMT_VINFO_DATA_REF (res) = NULL;
9323 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
9324 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
9326 if (gimple_code (stmt) == GIMPLE_PHI
9327 && is_loop_header_bb_p (gimple_bb (stmt)))
9328 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9329 else
9330 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9332 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
9333 STMT_SLP_TYPE (res) = loop_vect;
9334 STMT_VINFO_NUM_SLP_USES (res) = 0;
9336 GROUP_FIRST_ELEMENT (res) = NULL;
9337 GROUP_NEXT_ELEMENT (res) = NULL;
9338 GROUP_SIZE (res) = 0;
9339 GROUP_STORE_COUNT (res) = 0;
9340 GROUP_GAP (res) = 0;
9341 GROUP_SAME_DR_STMT (res) = NULL;
9343 return res;
9347 /* Create a hash table for stmt_vec_info. */
9349 void
9350 init_stmt_vec_info_vec (void)
9352 gcc_assert (!stmt_vec_info_vec.exists ());
9353 stmt_vec_info_vec.create (50);
9357 /* Free hash table for stmt_vec_info. */
9359 void
9360 free_stmt_vec_info_vec (void)
9362 unsigned int i;
9363 stmt_vec_info info;
9364 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
9365 if (info != NULL)
9366 free_stmt_vec_info (STMT_VINFO_STMT (info));
9367 gcc_assert (stmt_vec_info_vec.exists ());
9368 stmt_vec_info_vec.release ();
9372 /* Free stmt vectorization related info. */
9374 void
9375 free_stmt_vec_info (gimple *stmt)
9377 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9379 if (!stmt_info)
9380 return;
9382 /* Check if this statement has a related "pattern stmt"
9383 (introduced by the vectorizer during the pattern recognition
9384 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9385 too. */
9386 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9388 stmt_vec_info patt_info
9389 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9390 if (patt_info)
9392 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
9393 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
9394 gimple_set_bb (patt_stmt, NULL);
9395 tree lhs = gimple_get_lhs (patt_stmt);
9396 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9397 release_ssa_name (lhs);
9398 if (seq)
9400 gimple_stmt_iterator si;
9401 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
9403 gimple *seq_stmt = gsi_stmt (si);
9404 gimple_set_bb (seq_stmt, NULL);
9405 lhs = gimple_get_lhs (seq_stmt);
9406 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9407 release_ssa_name (lhs);
9408 free_stmt_vec_info (seq_stmt);
9411 free_stmt_vec_info (patt_stmt);
9415 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
9416 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
9417 set_vinfo_for_stmt (stmt, NULL);
9418 free (stmt_info);
9422 /* Function get_vectype_for_scalar_type_and_size.
9424 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9425 by the target. */
9427 tree
9428 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
9430 tree orig_scalar_type = scalar_type;
9431 scalar_mode inner_mode;
9432 machine_mode simd_mode;
9433 poly_uint64 nunits;
9434 tree vectype;
9436 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9437 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9438 return NULL_TREE;
9440 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9442 /* For vector types of elements whose mode precision doesn't
9443 match their types precision we use a element type of mode
9444 precision. The vectorization routines will have to make sure
9445 they support the proper result truncation/extension.
9446 We also make sure to build vector types with INTEGER_TYPE
9447 component type only. */
9448 if (INTEGRAL_TYPE_P (scalar_type)
9449 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9450 || TREE_CODE (scalar_type) != INTEGER_TYPE))
9451 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9452 TYPE_UNSIGNED (scalar_type));
9454 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9455 When the component mode passes the above test simply use a type
9456 corresponding to that mode. The theory is that any use that
9457 would cause problems with this will disable vectorization anyway. */
9458 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9459 && !INTEGRAL_TYPE_P (scalar_type))
9460 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9462 /* We can't build a vector type of elements with alignment bigger than
9463 their size. */
9464 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9465 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9466 TYPE_UNSIGNED (scalar_type));
9468 /* If we felt back to using the mode fail if there was
9469 no scalar type for it. */
9470 if (scalar_type == NULL_TREE)
9471 return NULL_TREE;
9473 /* If no size was supplied use the mode the target prefers. Otherwise
9474 lookup a vector mode of the specified size. */
9475 if (known_eq (size, 0U))
9476 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9477 else if (!multiple_p (size, nbytes, &nunits)
9478 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9479 return NULL_TREE;
9480 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9481 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
9482 return NULL_TREE;
9484 vectype = build_vector_type (scalar_type, nunits);
9486 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9487 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9488 return NULL_TREE;
9490 /* Re-attach the address-space qualifier if we canonicalized the scalar
9491 type. */
9492 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9493 return build_qualified_type
9494 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9496 return vectype;
9499 poly_uint64 current_vector_size;
9501 /* Function get_vectype_for_scalar_type.
9503 Returns the vector type corresponding to SCALAR_TYPE as supported
9504 by the target. */
9506 tree
9507 get_vectype_for_scalar_type (tree scalar_type)
9509 tree vectype;
9510 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9511 current_vector_size);
9512 if (vectype
9513 && known_eq (current_vector_size, 0U))
9514 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9515 return vectype;
9518 /* Function get_mask_type_for_scalar_type.
9520 Returns the mask type corresponding to a result of comparison
9521 of vectors of specified SCALAR_TYPE as supported by target. */
9523 tree
9524 get_mask_type_for_scalar_type (tree scalar_type)
9526 tree vectype = get_vectype_for_scalar_type (scalar_type);
9528 if (!vectype)
9529 return NULL;
9531 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9532 current_vector_size);
9535 /* Function get_same_sized_vectype
9537 Returns a vector type corresponding to SCALAR_TYPE of size
9538 VECTOR_TYPE if supported by the target. */
9540 tree
9541 get_same_sized_vectype (tree scalar_type, tree vector_type)
9543 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9544 return build_same_sized_truth_vector_type (vector_type);
9546 return get_vectype_for_scalar_type_and_size
9547 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9550 /* Function vect_is_simple_use.
9552 Input:
9553 VINFO - the vect info of the loop or basic block that is being vectorized.
9554 OPERAND - operand in the loop or bb.
9555 Output:
9556 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9557 DT - the type of definition
9559 Returns whether a stmt with OPERAND can be vectorized.
9560 For loops, supportable operands are constants, loop invariants, and operands
9561 that are defined by the current iteration of the loop. Unsupportable
9562 operands are those that are defined by a previous iteration of the loop (as
9563 is the case in reduction/induction computations).
9564 For basic blocks, supportable operands are constants and bb invariants.
9565 For now, operands defined outside the basic block are not supported. */
9567 bool
9568 vect_is_simple_use (tree operand, vec_info *vinfo,
9569 gimple **def_stmt, enum vect_def_type *dt)
9571 *def_stmt = NULL;
9572 *dt = vect_unknown_def_type;
9574 if (dump_enabled_p ())
9576 dump_printf_loc (MSG_NOTE, vect_location,
9577 "vect_is_simple_use: operand ");
9578 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9579 dump_printf (MSG_NOTE, "\n");
9582 if (CONSTANT_CLASS_P (operand))
9584 *dt = vect_constant_def;
9585 return true;
9588 if (is_gimple_min_invariant (operand))
9590 *dt = vect_external_def;
9591 return true;
9594 if (TREE_CODE (operand) != SSA_NAME)
9596 if (dump_enabled_p ())
9597 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9598 "not ssa-name.\n");
9599 return false;
9602 if (SSA_NAME_IS_DEFAULT_DEF (operand))
9604 *dt = vect_external_def;
9605 return true;
9608 *def_stmt = SSA_NAME_DEF_STMT (operand);
9609 if (dump_enabled_p ())
9611 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9612 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
9615 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
9616 *dt = vect_external_def;
9617 else
9619 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
9620 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9623 if (dump_enabled_p ())
9625 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9626 switch (*dt)
9628 case vect_uninitialized_def:
9629 dump_printf (MSG_NOTE, "uninitialized\n");
9630 break;
9631 case vect_constant_def:
9632 dump_printf (MSG_NOTE, "constant\n");
9633 break;
9634 case vect_external_def:
9635 dump_printf (MSG_NOTE, "external\n");
9636 break;
9637 case vect_internal_def:
9638 dump_printf (MSG_NOTE, "internal\n");
9639 break;
9640 case vect_induction_def:
9641 dump_printf (MSG_NOTE, "induction\n");
9642 break;
9643 case vect_reduction_def:
9644 dump_printf (MSG_NOTE, "reduction\n");
9645 break;
9646 case vect_double_reduction_def:
9647 dump_printf (MSG_NOTE, "double reduction\n");
9648 break;
9649 case vect_nested_cycle:
9650 dump_printf (MSG_NOTE, "nested cycle\n");
9651 break;
9652 case vect_unknown_def_type:
9653 dump_printf (MSG_NOTE, "unknown\n");
9654 break;
9658 if (*dt == vect_unknown_def_type)
9660 if (dump_enabled_p ())
9661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9662 "Unsupported pattern.\n");
9663 return false;
9666 switch (gimple_code (*def_stmt))
9668 case GIMPLE_PHI:
9669 case GIMPLE_ASSIGN:
9670 case GIMPLE_CALL:
9671 break;
9672 default:
9673 if (dump_enabled_p ())
9674 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9675 "unsupported defining stmt:\n");
9676 return false;
9679 return true;
9682 /* Function vect_is_simple_use.
9684 Same as vect_is_simple_use but also determines the vector operand
9685 type of OPERAND and stores it to *VECTYPE. If the definition of
9686 OPERAND is vect_uninitialized_def, vect_constant_def or
9687 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9688 is responsible to compute the best suited vector type for the
9689 scalar operand. */
9691 bool
9692 vect_is_simple_use (tree operand, vec_info *vinfo,
9693 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
9695 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
9696 return false;
9698 /* Now get a vector type if the def is internal, otherwise supply
9699 NULL_TREE and leave it up to the caller to figure out a proper
9700 type for the use stmt. */
9701 if (*dt == vect_internal_def
9702 || *dt == vect_induction_def
9703 || *dt == vect_reduction_def
9704 || *dt == vect_double_reduction_def
9705 || *dt == vect_nested_cycle)
9707 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
9709 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9710 && !STMT_VINFO_RELEVANT (stmt_info)
9711 && !STMT_VINFO_LIVE_P (stmt_info))
9712 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9714 *vectype = STMT_VINFO_VECTYPE (stmt_info);
9715 gcc_assert (*vectype != NULL_TREE);
9717 else if (*dt == vect_uninitialized_def
9718 || *dt == vect_constant_def
9719 || *dt == vect_external_def)
9720 *vectype = NULL_TREE;
9721 else
9722 gcc_unreachable ();
9724 return true;
9728 /* Function supportable_widening_operation
9730 Check whether an operation represented by the code CODE is a
9731 widening operation that is supported by the target platform in
9732 vector form (i.e., when operating on arguments of type VECTYPE_IN
9733 producing a result of type VECTYPE_OUT).
9735 Widening operations we currently support are NOP (CONVERT), FLOAT
9736 and WIDEN_MULT. This function checks if these operations are supported
9737 by the target platform either directly (via vector tree-codes), or via
9738 target builtins.
9740 Output:
9741 - CODE1 and CODE2 are codes of vector operations to be used when
9742 vectorizing the operation, if available.
9743 - MULTI_STEP_CVT determines the number of required intermediate steps in
9744 case of multi-step conversion (like char->short->int - in that case
9745 MULTI_STEP_CVT will be 1).
9746 - INTERM_TYPES contains the intermediate type required to perform the
9747 widening operation (short in the above example). */
9749 bool
9750 supportable_widening_operation (enum tree_code code, gimple *stmt,
9751 tree vectype_out, tree vectype_in,
9752 enum tree_code *code1, enum tree_code *code2,
9753 int *multi_step_cvt,
9754 vec<tree> *interm_types)
9756 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9757 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
9758 struct loop *vect_loop = NULL;
9759 machine_mode vec_mode;
9760 enum insn_code icode1, icode2;
9761 optab optab1, optab2;
9762 tree vectype = vectype_in;
9763 tree wide_vectype = vectype_out;
9764 enum tree_code c1, c2;
9765 int i;
9766 tree prev_type, intermediate_type;
9767 machine_mode intermediate_mode, prev_mode;
9768 optab optab3, optab4;
9770 *multi_step_cvt = 0;
9771 if (loop_info)
9772 vect_loop = LOOP_VINFO_LOOP (loop_info);
9774 switch (code)
9776 case WIDEN_MULT_EXPR:
9777 /* The result of a vectorized widening operation usually requires
9778 two vectors (because the widened results do not fit into one vector).
9779 The generated vector results would normally be expected to be
9780 generated in the same order as in the original scalar computation,
9781 i.e. if 8 results are generated in each vector iteration, they are
9782 to be organized as follows:
9783 vect1: [res1,res2,res3,res4],
9784 vect2: [res5,res6,res7,res8].
9786 However, in the special case that the result of the widening
9787 operation is used in a reduction computation only, the order doesn't
9788 matter (because when vectorizing a reduction we change the order of
9789 the computation). Some targets can take advantage of this and
9790 generate more efficient code. For example, targets like Altivec,
9791 that support widen_mult using a sequence of {mult_even,mult_odd}
9792 generate the following vectors:
9793 vect1: [res1,res3,res5,res7],
9794 vect2: [res2,res4,res6,res8].
9796 When vectorizing outer-loops, we execute the inner-loop sequentially
9797 (each vectorized inner-loop iteration contributes to VF outer-loop
9798 iterations in parallel). We therefore don't allow to change the
9799 order of the computation in the inner-loop during outer-loop
9800 vectorization. */
9801 /* TODO: Another case in which order doesn't *really* matter is when we
9802 widen and then contract again, e.g. (short)((int)x * y >> 8).
9803 Normally, pack_trunc performs an even/odd permute, whereas the
9804 repack from an even/odd expansion would be an interleave, which
9805 would be significantly simpler for e.g. AVX2. */
9806 /* In any case, in order to avoid duplicating the code below, recurse
9807 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9808 are properly set up for the caller. If we fail, we'll continue with
9809 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9810 if (vect_loop
9811 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9812 && !nested_in_vect_loop_p (vect_loop, stmt)
9813 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9814 stmt, vectype_out, vectype_in,
9815 code1, code2, multi_step_cvt,
9816 interm_types))
9818 /* Elements in a vector with vect_used_by_reduction property cannot
9819 be reordered if the use chain with this property does not have the
9820 same operation. One such an example is s += a * b, where elements
9821 in a and b cannot be reordered. Here we check if the vector defined
9822 by STMT is only directly used in the reduction statement. */
9823 tree lhs = gimple_assign_lhs (stmt);
9824 use_operand_p dummy;
9825 gimple *use_stmt;
9826 stmt_vec_info use_stmt_info = NULL;
9827 if (single_imm_use (lhs, &dummy, &use_stmt)
9828 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9829 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9830 return true;
9832 c1 = VEC_WIDEN_MULT_LO_EXPR;
9833 c2 = VEC_WIDEN_MULT_HI_EXPR;
9834 break;
9836 case DOT_PROD_EXPR:
9837 c1 = DOT_PROD_EXPR;
9838 c2 = DOT_PROD_EXPR;
9839 break;
9841 case SAD_EXPR:
9842 c1 = SAD_EXPR;
9843 c2 = SAD_EXPR;
9844 break;
9846 case VEC_WIDEN_MULT_EVEN_EXPR:
9847 /* Support the recursion induced just above. */
9848 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9849 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9850 break;
9852 case WIDEN_LSHIFT_EXPR:
9853 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9854 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
9855 break;
9857 CASE_CONVERT:
9858 c1 = VEC_UNPACK_LO_EXPR;
9859 c2 = VEC_UNPACK_HI_EXPR;
9860 break;
9862 case FLOAT_EXPR:
9863 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9864 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
9865 break;
9867 case FIX_TRUNC_EXPR:
9868 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9869 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9870 computing the operation. */
9871 return false;
9873 default:
9874 gcc_unreachable ();
9877 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
9878 std::swap (c1, c2);
9880 if (code == FIX_TRUNC_EXPR)
9882 /* The signedness is determined from output operand. */
9883 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9884 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
9886 else
9888 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9889 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9892 if (!optab1 || !optab2)
9893 return false;
9895 vec_mode = TYPE_MODE (vectype);
9896 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9897 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
9898 return false;
9900 *code1 = c1;
9901 *code2 = c2;
9903 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9904 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9905 /* For scalar masks we may have different boolean
9906 vector types having the same QImode. Thus we
9907 add additional check for elements number. */
9908 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9909 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
9910 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
9912 /* Check if it's a multi-step conversion that can be done using intermediate
9913 types. */
9915 prev_type = vectype;
9916 prev_mode = vec_mode;
9918 if (!CONVERT_EXPR_CODE_P (code))
9919 return false;
9921 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9922 intermediate steps in promotion sequence. We try
9923 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9924 not. */
9925 interm_types->create (MAX_INTERM_CVT_STEPS);
9926 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9928 intermediate_mode = insn_data[icode1].operand[0].mode;
9929 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9931 intermediate_type = vect_halve_mask_nunits (prev_type);
9932 if (intermediate_mode != TYPE_MODE (intermediate_type))
9933 return false;
9935 else
9936 intermediate_type
9937 = lang_hooks.types.type_for_mode (intermediate_mode,
9938 TYPE_UNSIGNED (prev_type));
9940 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9941 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9943 if (!optab3 || !optab4
9944 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9945 || insn_data[icode1].operand[0].mode != intermediate_mode
9946 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9947 || insn_data[icode2].operand[0].mode != intermediate_mode
9948 || ((icode1 = optab_handler (optab3, intermediate_mode))
9949 == CODE_FOR_nothing)
9950 || ((icode2 = optab_handler (optab4, intermediate_mode))
9951 == CODE_FOR_nothing))
9952 break;
9954 interm_types->quick_push (intermediate_type);
9955 (*multi_step_cvt)++;
9957 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9958 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9959 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9960 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
9961 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
9963 prev_type = intermediate_type;
9964 prev_mode = intermediate_mode;
9967 interm_types->release ();
9968 return false;
9972 /* Function supportable_narrowing_operation
9974 Check whether an operation represented by the code CODE is a
9975 narrowing operation that is supported by the target platform in
9976 vector form (i.e., when operating on arguments of type VECTYPE_IN
9977 and producing a result of type VECTYPE_OUT).
9979 Narrowing operations we currently support are NOP (CONVERT) and
9980 FIX_TRUNC. This function checks if these operations are supported by
9981 the target platform directly via vector tree-codes.
9983 Output:
9984 - CODE1 is the code of a vector operation to be used when
9985 vectorizing the operation, if available.
9986 - MULTI_STEP_CVT determines the number of required intermediate steps in
9987 case of multi-step conversion (like int->short->char - in that case
9988 MULTI_STEP_CVT will be 1).
9989 - INTERM_TYPES contains the intermediate type required to perform the
9990 narrowing operation (short in the above example). */
9992 bool
9993 supportable_narrowing_operation (enum tree_code code,
9994 tree vectype_out, tree vectype_in,
9995 enum tree_code *code1, int *multi_step_cvt,
9996 vec<tree> *interm_types)
9998 machine_mode vec_mode;
9999 enum insn_code icode1;
10000 optab optab1, interm_optab;
10001 tree vectype = vectype_in;
10002 tree narrow_vectype = vectype_out;
10003 enum tree_code c1;
10004 tree intermediate_type, prev_type;
10005 machine_mode intermediate_mode, prev_mode;
10006 int i;
10007 bool uns;
10009 *multi_step_cvt = 0;
10010 switch (code)
10012 CASE_CONVERT:
10013 c1 = VEC_PACK_TRUNC_EXPR;
10014 break;
10016 case FIX_TRUNC_EXPR:
10017 c1 = VEC_PACK_FIX_TRUNC_EXPR;
10018 break;
10020 case FLOAT_EXPR:
10021 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
10022 tree code and optabs used for computing the operation. */
10023 return false;
10025 default:
10026 gcc_unreachable ();
10029 if (code == FIX_TRUNC_EXPR)
10030 /* The signedness is determined from output operand. */
10031 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10032 else
10033 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10035 if (!optab1)
10036 return false;
10038 vec_mode = TYPE_MODE (vectype);
10039 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
10040 return false;
10042 *code1 = c1;
10044 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10045 /* For scalar masks we may have different boolean
10046 vector types having the same QImode. Thus we
10047 add additional check for elements number. */
10048 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10049 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10050 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10052 /* Check if it's a multi-step conversion that can be done using intermediate
10053 types. */
10054 prev_mode = vec_mode;
10055 prev_type = vectype;
10056 if (code == FIX_TRUNC_EXPR)
10057 uns = TYPE_UNSIGNED (vectype_out);
10058 else
10059 uns = TYPE_UNSIGNED (vectype);
10061 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10062 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10063 costly than signed. */
10064 if (code == FIX_TRUNC_EXPR && uns)
10066 enum insn_code icode2;
10068 intermediate_type
10069 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10070 interm_optab
10071 = optab_for_tree_code (c1, intermediate_type, optab_default);
10072 if (interm_optab != unknown_optab
10073 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10074 && insn_data[icode1].operand[0].mode
10075 == insn_data[icode2].operand[0].mode)
10077 uns = false;
10078 optab1 = interm_optab;
10079 icode1 = icode2;
10083 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10084 intermediate steps in promotion sequence. We try
10085 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
10086 interm_types->create (MAX_INTERM_CVT_STEPS);
10087 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10089 intermediate_mode = insn_data[icode1].operand[0].mode;
10090 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10092 intermediate_type = vect_double_mask_nunits (prev_type);
10093 if (intermediate_mode != TYPE_MODE (intermediate_type))
10094 return false;
10096 else
10097 intermediate_type
10098 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
10099 interm_optab
10100 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10101 optab_default);
10102 if (!interm_optab
10103 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10104 || insn_data[icode1].operand[0].mode != intermediate_mode
10105 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10106 == CODE_FOR_nothing))
10107 break;
10109 interm_types->quick_push (intermediate_type);
10110 (*multi_step_cvt)++;
10112 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10113 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10114 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10115 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10117 prev_mode = intermediate_mode;
10118 prev_type = intermediate_type;
10119 optab1 = interm_optab;
10122 interm_types->release ();
10123 return false;
10126 /* Generate and return a statement that sets vector mask MASK such that
10127 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10129 gcall *
10130 vect_gen_while (tree mask, tree start_index, tree end_index)
10132 tree cmp_type = TREE_TYPE (start_index);
10133 tree mask_type = TREE_TYPE (mask);
10134 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10135 cmp_type, mask_type,
10136 OPTIMIZE_FOR_SPEED));
10137 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10138 start_index, end_index,
10139 build_zero_cst (mask_type));
10140 gimple_call_set_lhs (call, mask);
10141 return call;
10144 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10145 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10147 tree
10148 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10149 tree end_index)
10151 tree tmp = make_ssa_name (mask_type);
10152 gcall *call = vect_gen_while (tmp, start_index, end_index);
10153 gimple_seq_add_stmt (seq, call);
10154 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);