* gcc.target/powerpc/pr85456.c: Require longdouble128.
[official-gcc.git] / gcc / tree-vect-stmts.c
blob1c847ae016d1de09f5d8719ab689a9c0414ba822
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "tree-ssa-loop-niter.h"
54 #include "gimple-fold.h"
56 /* For lang_hooks.types.type_for_mode. */
57 #include "langhooks.h"
59 /* Return the vectorized type for the given statement. */
61 tree
62 stmt_vectype (struct _stmt_vec_info *stmt_info)
64 return STMT_VINFO_VECTYPE (stmt_info);
67 /* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69 bool
70 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
77 if (!loop_vinfo)
78 return false;
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
82 return (bb->loop_father == loop->inner);
85 /* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
89 unsigned
90 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92 int misalign, enum vect_cost_model_location where)
94 if ((kind == vector_load || kind == unaligned_load)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96 kind = vector_gather_load;
97 if ((kind == vector_store || kind == unaligned_store)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_scatter_store;
101 stmt_info_for_cost si = { count, kind, where,
102 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
103 misalign };
104 body_cost_vec->safe_push (si);
106 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
107 return (unsigned)
108 (builtin_vectorization_cost (kind, vectype, misalign) * count);
111 /* Return a variable of type ELEM_TYPE[NELEMS]. */
113 static tree
114 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
117 "vect_array");
120 /* ARRAY is an array of vectors created by create_vector_array.
121 Return an SSA_NAME for the vector in index N. The reference
122 is part of the vectorization of STMT and the vector is associated
123 with scalar destination SCALAR_DEST. */
125 static tree
126 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
127 tree array, unsigned HOST_WIDE_INT n)
129 tree vect_type, vect, vect_name, array_ref;
130 gimple *new_stmt;
132 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
133 vect_type = TREE_TYPE (TREE_TYPE (array));
134 vect = vect_create_destination_var (scalar_dest, vect_type);
135 array_ref = build4 (ARRAY_REF, vect_type, array,
136 build_int_cst (size_type_node, n),
137 NULL_TREE, NULL_TREE);
139 new_stmt = gimple_build_assign (vect, array_ref);
140 vect_name = make_ssa_name (vect, new_stmt);
141 gimple_assign_set_lhs (new_stmt, vect_name);
142 vect_finish_stmt_generation (stmt, new_stmt, gsi);
144 return vect_name;
147 /* ARRAY is an array of vectors created by create_vector_array.
148 Emit code to store SSA_NAME VECT in index N of the array.
149 The store is part of the vectorization of STMT. */
151 static void
152 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
153 tree array, unsigned HOST_WIDE_INT n)
155 tree array_ref;
156 gimple *new_stmt;
158 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
159 build_int_cst (size_type_node, n),
160 NULL_TREE, NULL_TREE);
162 new_stmt = gimple_build_assign (array_ref, vect);
163 vect_finish_stmt_generation (stmt, new_stmt, gsi);
166 /* PTR is a pointer to an array of type TYPE. Return a representation
167 of *PTR. The memory reference replaces those in FIRST_DR
168 (and its group). */
170 static tree
171 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
173 tree mem_ref;
175 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
176 /* Arrays have the same alignment as their type. */
177 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
178 return mem_ref;
181 /* Add a clobber of variable VAR to the vectorization of STMT.
182 Emit the clobber before *GSI. */
184 static void
185 vect_clobber_variable (gimple *stmt, gimple_stmt_iterator *gsi, tree var)
187 tree clobber = build_clobber (TREE_TYPE (var));
188 gimple *new_stmt = gimple_build_assign (var, clobber);
189 vect_finish_stmt_generation (stmt, new_stmt, gsi);
192 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
194 /* Function vect_mark_relevant.
196 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
198 static void
199 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
200 enum vect_relevant relevant, bool live_p)
202 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
203 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
204 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
205 gimple *pattern_stmt;
207 if (dump_enabled_p ())
209 dump_printf_loc (MSG_NOTE, vect_location,
210 "mark relevant %d, live %d: ", relevant, live_p);
211 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
225 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
227 if (dump_enabled_p ())
228 dump_printf_loc (MSG_NOTE, vect_location,
229 "last stmt in pattern. don't mark"
230 " relevant/live.\n");
231 stmt_info = vinfo_for_stmt (pattern_stmt);
232 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
233 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
234 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
235 stmt = pattern_stmt;
238 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
239 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
240 STMT_VINFO_RELEVANT (stmt_info) = relevant;
242 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
243 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
245 if (dump_enabled_p ())
246 dump_printf_loc (MSG_NOTE, vect_location,
247 "already marked relevant/live.\n");
248 return;
251 worklist->safe_push (stmt);
255 /* Function is_simple_and_all_uses_invariant
257 Return true if STMT is simple and all uses of it are invariant. */
259 bool
260 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
262 tree op;
263 ssa_op_iter iter;
265 if (!is_gimple_assign (stmt))
266 return false;
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
270 enum vect_def_type dt = vect_uninitialized_def;
272 if (!vect_is_simple_use (op, loop_vinfo, &dt))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
283 return true;
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT in loop that is represented by LOOP_VINFO is
289 "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
298 static bool
299 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
302 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt)
313 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
314 != loop_exit_ctrl_vec_info_type)
315 *relevant = vect_used_in_scope;
317 /* changing memory. */
318 if (gimple_code (stmt) != GIMPLE_PHI)
319 if (gimple_vdef (stmt)
320 && !gimple_clobber_p (stmt))
322 if (dump_enabled_p ())
323 dump_printf_loc (MSG_NOTE, vect_location,
324 "vec_stmt_relevant_p: stmt has vdefs.\n");
325 *relevant = vect_used_in_scope;
328 /* uses outside the loop. */
329 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
331 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
333 basic_block bb = gimple_bb (USE_STMT (use_p));
334 if (!flow_bb_inside_loop_p (loop, bb))
336 if (dump_enabled_p ())
337 dump_printf_loc (MSG_NOTE, vect_location,
338 "vec_stmt_relevant_p: used out of loop.\n");
340 if (is_gimple_debug (USE_STMT (use_p)))
341 continue;
343 /* We expect all such uses to be in the loop exit phis
344 (because of loop closed form) */
345 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
346 gcc_assert (bb == single_exit (loop)->dest);
348 *live_p = true;
353 if (*live_p && *relevant == vect_unused_in_scope
354 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
356 if (dump_enabled_p ())
357 dump_printf_loc (MSG_NOTE, vect_location,
358 "vec_stmt_relevant_p: stmt live but not relevant.\n");
359 *relevant = vect_used_only_live;
362 return (*live_p || *relevant);
366 /* Function exist_non_indexing_operands_for_use_p
368 USE is one of the uses attached to STMT. Check if USE is
369 used in STMT for anything other than indexing an array. */
371 static bool
372 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
374 tree operand;
375 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
377 /* USE corresponds to some operand in STMT. If there is no data
378 reference in STMT, then any operand that corresponds to USE
379 is not indexing an array. */
380 if (!STMT_VINFO_DATA_REF (stmt_info))
381 return true;
383 /* STMT has a data_ref. FORNOW this means that its of one of
384 the following forms:
385 -1- ARRAY_REF = var
386 -2- var = ARRAY_REF
387 (This should have been verified in analyze_data_refs).
389 'var' in the second case corresponds to a def, not a use,
390 so USE cannot correspond to any operands that are not used
391 for array indexing.
393 Therefore, all we need to check is if STMT falls into the
394 first case, and whether var corresponds to USE. */
396 if (!gimple_assign_copy_p (stmt))
398 if (is_gimple_call (stmt)
399 && gimple_call_internal_p (stmt))
401 internal_fn ifn = gimple_call_internal_fn (stmt);
402 int mask_index = internal_fn_mask_index (ifn);
403 if (mask_index >= 0
404 && use == gimple_call_arg (stmt, mask_index))
405 return true;
406 int stored_value_index = internal_fn_stored_value_index (ifn);
407 if (stored_value_index >= 0
408 && use == gimple_call_arg (stmt, stored_value_index))
409 return true;
410 if (internal_gather_scatter_fn_p (ifn)
411 && use == gimple_call_arg (stmt, 1))
412 return true;
414 return false;
417 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
418 return false;
419 operand = gimple_assign_rhs1 (stmt);
420 if (TREE_CODE (operand) != SSA_NAME)
421 return false;
423 if (operand == use)
424 return true;
426 return false;
431 Function process_use.
433 Inputs:
434 - a USE in STMT in a loop represented by LOOP_VINFO
435 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
436 that defined USE. This is done by calling mark_relevant and passing it
437 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
438 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
439 be performed.
441 Outputs:
442 Generally, LIVE_P and RELEVANT are used to define the liveness and
443 relevance info of the DEF_STMT of this USE:
444 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
445 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
446 Exceptions:
447 - case 1: If USE is used only for address computations (e.g. array indexing),
448 which does not need to be directly vectorized, then the liveness/relevance
449 of the respective DEF_STMT is left unchanged.
450 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
451 skip DEF_STMT cause it had already been processed.
452 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
453 be modified accordingly.
455 Return true if everything is as expected. Return false otherwise. */
457 static bool
458 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
459 enum vect_relevant relevant, vec<gimple *> *worklist,
460 bool force)
462 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
463 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
464 stmt_vec_info dstmt_vinfo;
465 basic_block bb, def_bb;
466 gimple *def_stmt;
467 enum vect_def_type dt;
469 /* case 1: we are only interested in uses that need to be vectorized. Uses
470 that are used for address computation are not considered relevant. */
471 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
472 return true;
474 if (!vect_is_simple_use (use, loop_vinfo, &dt, &def_stmt))
476 if (dump_enabled_p ())
477 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
478 "not vectorized: unsupported use in stmt.\n");
479 return false;
482 if (!def_stmt || gimple_nop_p (def_stmt))
483 return true;
485 def_bb = gimple_bb (def_stmt);
486 if (!flow_bb_inside_loop_p (loop, def_bb))
488 if (dump_enabled_p ())
489 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
490 return true;
493 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
494 DEF_STMT must have already been processed, because this should be the
495 only way that STMT, which is a reduction-phi, was put in the worklist,
496 as there should be no other uses for DEF_STMT in the loop. So we just
497 check that everything is as expected, and we are done. */
498 dstmt_vinfo = vinfo_for_stmt (def_stmt);
499 bb = gimple_bb (stmt);
500 if (gimple_code (stmt) == GIMPLE_PHI
501 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
502 && gimple_code (def_stmt) != GIMPLE_PHI
503 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
504 && bb->loop_father == def_bb->loop_father)
506 if (dump_enabled_p ())
507 dump_printf_loc (MSG_NOTE, vect_location,
508 "reduc-stmt defining reduc-phi in the same nest.\n");
509 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
510 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
511 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
512 return true;
515 /* case 3a: outer-loop stmt defining an inner-loop stmt:
516 outer-loop-header-bb:
517 d = def_stmt
518 inner-loop:
519 stmt # use (d)
520 outer-loop-tail-bb:
521 ... */
522 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
524 if (dump_enabled_p ())
525 dump_printf_loc (MSG_NOTE, vect_location,
526 "outer-loop def-stmt defining inner-loop stmt.\n");
528 switch (relevant)
530 case vect_unused_in_scope:
531 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
532 vect_used_in_scope : vect_unused_in_scope;
533 break;
535 case vect_used_in_outer_by_reduction:
536 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
537 relevant = vect_used_by_reduction;
538 break;
540 case vect_used_in_outer:
541 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
542 relevant = vect_used_in_scope;
543 break;
545 case vect_used_in_scope:
546 break;
548 default:
549 gcc_unreachable ();
553 /* case 3b: inner-loop stmt defining an outer-loop stmt:
554 outer-loop-header-bb:
556 inner-loop:
557 d = def_stmt
558 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
559 stmt # use (d) */
560 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
562 if (dump_enabled_p ())
563 dump_printf_loc (MSG_NOTE, vect_location,
564 "inner-loop def-stmt defining outer-loop stmt.\n");
566 switch (relevant)
568 case vect_unused_in_scope:
569 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
570 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
571 vect_used_in_outer_by_reduction : vect_unused_in_scope;
572 break;
574 case vect_used_by_reduction:
575 case vect_used_only_live:
576 relevant = vect_used_in_outer_by_reduction;
577 break;
579 case vect_used_in_scope:
580 relevant = vect_used_in_outer;
581 break;
583 default:
584 gcc_unreachable ();
587 /* We are also not interested in uses on loop PHI backedges that are
588 inductions. Otherwise we'll needlessly vectorize the IV increment
589 and cause hybrid SLP for SLP inductions. Unless the PHI is live
590 of course. */
591 else if (gimple_code (stmt) == GIMPLE_PHI
592 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
593 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
594 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
595 == use))
597 if (dump_enabled_p ())
598 dump_printf_loc (MSG_NOTE, vect_location,
599 "induction value on backedge.\n");
600 return true;
604 vect_mark_relevant (worklist, def_stmt, relevant, false);
605 return true;
609 /* Function vect_mark_stmts_to_be_vectorized.
611 Not all stmts in the loop need to be vectorized. For example:
613 for i...
614 for j...
615 1. T0 = i + j
616 2. T1 = a[T0]
618 3. j = j + 1
620 Stmt 1 and 3 do not need to be vectorized, because loop control and
621 addressing of vectorized data-refs are handled differently.
623 This pass detects such stmts. */
625 bool
626 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
628 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
629 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
630 unsigned int nbbs = loop->num_nodes;
631 gimple_stmt_iterator si;
632 gimple *stmt;
633 unsigned int i;
634 stmt_vec_info stmt_vinfo;
635 basic_block bb;
636 gimple *phi;
637 bool live_p;
638 enum vect_relevant relevant;
640 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
642 auto_vec<gimple *, 64> worklist;
644 /* 1. Init worklist. */
645 for (i = 0; i < nbbs; i++)
647 bb = bbs[i];
648 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
650 phi = gsi_stmt (si);
651 if (dump_enabled_p ())
653 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
654 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
657 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
658 vect_mark_relevant (&worklist, phi, relevant, live_p);
660 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
662 stmt = gsi_stmt (si);
663 if (dump_enabled_p ())
665 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
666 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
669 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
670 vect_mark_relevant (&worklist, stmt, relevant, live_p);
674 /* 2. Process_worklist */
675 while (worklist.length () > 0)
677 use_operand_p use_p;
678 ssa_op_iter iter;
680 stmt = worklist.pop ();
681 if (dump_enabled_p ())
683 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
684 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
687 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
688 (DEF_STMT) as relevant/irrelevant according to the relevance property
689 of STMT. */
690 stmt_vinfo = vinfo_for_stmt (stmt);
691 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
693 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
694 propagated as is to the DEF_STMTs of its USEs.
696 One exception is when STMT has been identified as defining a reduction
697 variable; in this case we set the relevance to vect_used_by_reduction.
698 This is because we distinguish between two kinds of relevant stmts -
699 those that are used by a reduction computation, and those that are
700 (also) used by a regular computation. This allows us later on to
701 identify stmts that are used solely by a reduction, and therefore the
702 order of the results that they produce does not have to be kept. */
704 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
706 case vect_reduction_def:
707 gcc_assert (relevant != vect_unused_in_scope);
708 if (relevant != vect_unused_in_scope
709 && relevant != vect_used_in_scope
710 && relevant != vect_used_by_reduction
711 && relevant != vect_used_only_live)
713 if (dump_enabled_p ())
714 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
715 "unsupported use of reduction.\n");
716 return false;
718 break;
720 case vect_nested_cycle:
721 if (relevant != vect_unused_in_scope
722 && relevant != vect_used_in_outer_by_reduction
723 && relevant != vect_used_in_outer)
725 if (dump_enabled_p ())
726 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
727 "unsupported use of nested cycle.\n");
729 return false;
731 break;
733 case vect_double_reduction_def:
734 if (relevant != vect_unused_in_scope
735 && relevant != vect_used_by_reduction
736 && relevant != vect_used_only_live)
738 if (dump_enabled_p ())
739 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
740 "unsupported use of double reduction.\n");
742 return false;
744 break;
746 default:
747 break;
750 if (is_pattern_stmt_p (stmt_vinfo))
752 /* Pattern statements are not inserted into the code, so
753 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
754 have to scan the RHS or function arguments instead. */
755 if (is_gimple_assign (stmt))
757 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
758 tree op = gimple_assign_rhs1 (stmt);
760 i = 1;
761 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
763 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
764 relevant, &worklist, false)
765 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
766 relevant, &worklist, false))
767 return false;
768 i = 2;
770 for (; i < gimple_num_ops (stmt); i++)
772 op = gimple_op (stmt, i);
773 if (TREE_CODE (op) == SSA_NAME
774 && !process_use (stmt, op, loop_vinfo, relevant,
775 &worklist, false))
776 return false;
779 else if (is_gimple_call (stmt))
781 for (i = 0; i < gimple_call_num_args (stmt); i++)
783 tree arg = gimple_call_arg (stmt, i);
784 if (!process_use (stmt, arg, loop_vinfo, relevant,
785 &worklist, false))
786 return false;
790 else
791 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
793 tree op = USE_FROM_PTR (use_p);
794 if (!process_use (stmt, op, loop_vinfo, relevant,
795 &worklist, false))
796 return false;
799 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
801 gather_scatter_info gs_info;
802 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
803 gcc_unreachable ();
804 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
805 &worklist, true))
806 return false;
808 } /* while worklist */
810 return true;
813 /* Compute the prologue cost for invariant or constant operands. */
815 static unsigned
816 vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
817 unsigned opno, enum vect_def_type dt,
818 stmt_vector_for_cost *cost_vec)
820 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
821 tree op = gimple_op (stmt, opno);
822 unsigned prologue_cost = 0;
824 /* Without looking at the actual initializer a vector of
825 constants can be implemented as load from the constant pool.
826 When all elements are the same we can use a splat. */
827 tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
828 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
829 unsigned num_vects_to_check;
830 unsigned HOST_WIDE_INT const_nunits;
831 unsigned nelt_limit;
832 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
833 && ! multiple_p (const_nunits, group_size))
835 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
836 nelt_limit = const_nunits;
838 else
840 /* If either the vector has variable length or the vectors
841 are composed of repeated whole groups we only need to
842 cost construction once. All vectors will be the same. */
843 num_vects_to_check = 1;
844 nelt_limit = group_size;
846 tree elt = NULL_TREE;
847 unsigned nelt = 0;
848 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
850 unsigned si = j % group_size;
851 if (nelt == 0)
852 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si], opno);
853 /* ??? We're just tracking whether all operands of a single
854 vector initializer are the same, ideally we'd check if
855 we emitted the same one already. */
856 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si],
857 opno))
858 elt = NULL_TREE;
859 nelt++;
860 if (nelt == nelt_limit)
862 /* ??? We need to pass down stmt_info for a vector type
863 even if it points to the wrong stmt. */
864 prologue_cost += record_stmt_cost
865 (cost_vec, 1,
866 dt == vect_external_def
867 ? (elt ? scalar_to_vec : vec_construct)
868 : vector_load,
869 stmt_info, 0, vect_prologue);
870 nelt = 0;
874 return prologue_cost;
877 /* Function vect_model_simple_cost.
879 Models cost for simple operations, i.e. those that only emit ncopies of a
880 single op. Right now, this does not account for multiple insns that could
881 be generated for the single vector op. We will handle that shortly. */
883 static void
884 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
885 enum vect_def_type *dt,
886 int ndts,
887 slp_tree node,
888 stmt_vector_for_cost *cost_vec)
890 int inside_cost = 0, prologue_cost = 0;
892 gcc_assert (cost_vec != NULL);
894 /* ??? Somehow we need to fix this at the callers. */
895 if (node)
896 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
898 if (node)
900 /* Scan operands and account for prologue cost of constants/externals.
901 ??? This over-estimates cost for multiple uses and should be
902 re-engineered. */
903 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
904 tree lhs = gimple_get_lhs (stmt);
905 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
907 tree op = gimple_op (stmt, i);
908 enum vect_def_type dt;
909 if (!op || op == lhs)
910 continue;
911 if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
912 && (dt == vect_constant_def || dt == vect_external_def))
913 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
914 i, dt, cost_vec);
917 else
918 /* Cost the "broadcast" of a scalar operand in to a vector operand.
919 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
920 cost model. */
921 for (int i = 0; i < ndts; i++)
922 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
923 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
924 stmt_info, 0, vect_prologue);
926 /* Adjust for two-operator SLP nodes. */
927 if (node && SLP_TREE_TWO_OPERATORS (node))
929 ncopies *= 2;
930 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
931 stmt_info, 0, vect_body);
934 /* Pass the inside-of-loop statements to the target-specific cost model. */
935 inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
936 stmt_info, 0, vect_body);
938 if (dump_enabled_p ())
939 dump_printf_loc (MSG_NOTE, vect_location,
940 "vect_model_simple_cost: inside_cost = %d, "
941 "prologue_cost = %d .\n", inside_cost, prologue_cost);
945 /* Model cost for type demotion and promotion operations. PWR is normally
946 zero for single-step promotions and demotions. It will be one if
947 two-step promotion/demotion is required, and so on. Each additional
948 step doubles the number of instructions required. */
950 static void
951 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
952 enum vect_def_type *dt, int pwr,
953 stmt_vector_for_cost *cost_vec)
955 int i, tmp;
956 int inside_cost = 0, prologue_cost = 0;
958 for (i = 0; i < pwr + 1; i++)
960 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
961 (i + 1) : i;
962 inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
963 vec_promote_demote, stmt_info, 0,
964 vect_body);
967 /* FORNOW: Assuming maximum 2 args per stmts. */
968 for (i = 0; i < 2; i++)
969 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
970 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
971 stmt_info, 0, vect_prologue);
973 if (dump_enabled_p ())
974 dump_printf_loc (MSG_NOTE, vect_location,
975 "vect_model_promotion_demotion_cost: inside_cost = %d, "
976 "prologue_cost = %d .\n", inside_cost, prologue_cost);
979 /* Function vect_model_store_cost
981 Models cost for stores. In the case of grouped accesses, one access
982 has the overhead of the grouped access attributed to it. */
984 static void
985 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
986 enum vect_def_type dt,
987 vect_memory_access_type memory_access_type,
988 vec_load_store_type vls_type, slp_tree slp_node,
989 stmt_vector_for_cost *cost_vec)
991 unsigned int inside_cost = 0, prologue_cost = 0;
992 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
993 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
995 /* ??? Somehow we need to fix this at the callers. */
996 if (slp_node)
997 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
999 if (vls_type == VLS_STORE_INVARIANT)
1001 if (slp_node)
1002 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
1003 1, dt, cost_vec);
1004 else
1005 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
1006 stmt_info, 0, vect_prologue);
1009 /* Grouped stores update all elements in the group at once,
1010 so we want the DR for the first statement. */
1011 if (!slp_node && grouped_access_p)
1012 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
1014 /* True if we should include any once-per-group costs as well as
1015 the cost of the statement itself. For SLP we only get called
1016 once per group anyhow. */
1017 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1019 /* We assume that the cost of a single store-lanes instruction is
1020 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
1021 access is instead being provided by a permute-and-store operation,
1022 include the cost of the permutes. */
1023 if (first_stmt_p
1024 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1026 /* Uses a high and low interleave or shuffle operations for each
1027 needed permute. */
1028 int group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
1029 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1030 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
1031 stmt_info, 0, vect_body);
1033 if (dump_enabled_p ())
1034 dump_printf_loc (MSG_NOTE, vect_location,
1035 "vect_model_store_cost: strided group_size = %d .\n",
1036 group_size);
1039 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1040 /* Costs of the stores. */
1041 if (memory_access_type == VMAT_ELEMENTWISE
1042 || memory_access_type == VMAT_GATHER_SCATTER)
1044 /* N scalar stores plus extracting the elements. */
1045 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1046 inside_cost += record_stmt_cost (cost_vec,
1047 ncopies * assumed_nunits,
1048 scalar_store, stmt_info, 0, vect_body);
1050 else
1051 vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
1053 if (memory_access_type == VMAT_ELEMENTWISE
1054 || memory_access_type == VMAT_STRIDED_SLP)
1056 /* N scalar stores plus extracting the elements. */
1057 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1058 inside_cost += record_stmt_cost (cost_vec,
1059 ncopies * assumed_nunits,
1060 vec_to_scalar, stmt_info, 0, vect_body);
1063 if (dump_enabled_p ())
1064 dump_printf_loc (MSG_NOTE, vect_location,
1065 "vect_model_store_cost: inside_cost = %d, "
1066 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1070 /* Calculate cost of DR's memory access. */
1071 void
1072 vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
1073 unsigned int *inside_cost,
1074 stmt_vector_for_cost *body_cost_vec)
1076 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1077 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1079 switch (alignment_support_scheme)
1081 case dr_aligned:
1083 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1084 vector_store, stmt_info, 0,
1085 vect_body);
1087 if (dump_enabled_p ())
1088 dump_printf_loc (MSG_NOTE, vect_location,
1089 "vect_model_store_cost: aligned.\n");
1090 break;
1093 case dr_unaligned_supported:
1095 /* Here, we assign an additional cost for the unaligned store. */
1096 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1097 unaligned_store, stmt_info,
1098 DR_MISALIGNMENT (dr), vect_body);
1099 if (dump_enabled_p ())
1100 dump_printf_loc (MSG_NOTE, vect_location,
1101 "vect_model_store_cost: unaligned supported by "
1102 "hardware.\n");
1103 break;
1106 case dr_unaligned_unsupported:
1108 *inside_cost = VECT_MAX_COST;
1110 if (dump_enabled_p ())
1111 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1112 "vect_model_store_cost: unsupported access.\n");
1113 break;
1116 default:
1117 gcc_unreachable ();
1122 /* Function vect_model_load_cost
1124 Models cost for loads. In the case of grouped accesses, one access has
1125 the overhead of the grouped access attributed to it. Since unaligned
1126 accesses are supported for loads, we also account for the costs of the
1127 access scheme chosen. */
1129 static void
1130 vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
1131 vect_memory_access_type memory_access_type,
1132 slp_instance instance,
1133 slp_tree slp_node,
1134 stmt_vector_for_cost *cost_vec)
1136 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1137 unsigned int inside_cost = 0, prologue_cost = 0;
1138 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1140 gcc_assert (cost_vec);
1142 /* ??? Somehow we need to fix this at the callers. */
1143 if (slp_node)
1144 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1146 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1148 /* If the load is permuted then the alignment is determined by
1149 the first group element not by the first scalar stmt DR. */
1150 gimple *stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
1151 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1152 /* Record the cost for the permutation. */
1153 unsigned n_perms;
1154 unsigned assumed_nunits
1155 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info));
1156 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1157 vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1158 slp_vf, instance, true,
1159 &n_perms);
1160 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1161 stmt_info, 0, vect_body);
1162 /* And adjust the number of loads performed. This handles
1163 redundancies as well as loads that are later dead. */
1164 auto_sbitmap perm (DR_GROUP_SIZE (stmt_info));
1165 bitmap_clear (perm);
1166 for (unsigned i = 0;
1167 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1168 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1169 ncopies = 0;
1170 bool load_seen = false;
1171 for (unsigned i = 0; i < DR_GROUP_SIZE (stmt_info); ++i)
1173 if (i % assumed_nunits == 0)
1175 if (load_seen)
1176 ncopies++;
1177 load_seen = false;
1179 if (bitmap_bit_p (perm, i))
1180 load_seen = true;
1182 if (load_seen)
1183 ncopies++;
1184 gcc_assert (ncopies
1185 <= (DR_GROUP_SIZE (stmt_info) - DR_GROUP_GAP (stmt_info)
1186 + assumed_nunits - 1) / assumed_nunits);
1189 /* Grouped loads read all elements in the group at once,
1190 so we want the DR for the first statement. */
1191 if (!slp_node && grouped_access_p)
1192 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
1194 /* True if we should include any once-per-group costs as well as
1195 the cost of the statement itself. For SLP we only get called
1196 once per group anyhow. */
1197 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1199 /* We assume that the cost of a single load-lanes instruction is
1200 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1201 access is instead being provided by a load-and-permute operation,
1202 include the cost of the permutes. */
1203 if (first_stmt_p
1204 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1206 /* Uses an even and odd extract operations or shuffle operations
1207 for each needed permute. */
1208 int group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
1209 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1210 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1211 stmt_info, 0, vect_body);
1213 if (dump_enabled_p ())
1214 dump_printf_loc (MSG_NOTE, vect_location,
1215 "vect_model_load_cost: strided group_size = %d .\n",
1216 group_size);
1219 /* The loads themselves. */
1220 if (memory_access_type == VMAT_ELEMENTWISE
1221 || memory_access_type == VMAT_GATHER_SCATTER)
1223 /* N scalar loads plus gathering them into a vector. */
1224 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1225 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1226 inside_cost += record_stmt_cost (cost_vec,
1227 ncopies * assumed_nunits,
1228 scalar_load, stmt_info, 0, vect_body);
1230 else
1231 vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
1232 &inside_cost, &prologue_cost,
1233 cost_vec, cost_vec, true);
1234 if (memory_access_type == VMAT_ELEMENTWISE
1235 || memory_access_type == VMAT_STRIDED_SLP)
1236 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1237 stmt_info, 0, vect_body);
1239 if (dump_enabled_p ())
1240 dump_printf_loc (MSG_NOTE, vect_location,
1241 "vect_model_load_cost: inside_cost = %d, "
1242 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1246 /* Calculate cost of DR's memory access. */
1247 void
1248 vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
1249 bool add_realign_cost, unsigned int *inside_cost,
1250 unsigned int *prologue_cost,
1251 stmt_vector_for_cost *prologue_cost_vec,
1252 stmt_vector_for_cost *body_cost_vec,
1253 bool record_prologue_costs)
1255 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1256 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1258 switch (alignment_support_scheme)
1260 case dr_aligned:
1262 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1263 stmt_info, 0, vect_body);
1265 if (dump_enabled_p ())
1266 dump_printf_loc (MSG_NOTE, vect_location,
1267 "vect_model_load_cost: aligned.\n");
1269 break;
1271 case dr_unaligned_supported:
1273 /* Here, we assign an additional cost for the unaligned load. */
1274 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1275 unaligned_load, stmt_info,
1276 DR_MISALIGNMENT (dr), vect_body);
1278 if (dump_enabled_p ())
1279 dump_printf_loc (MSG_NOTE, vect_location,
1280 "vect_model_load_cost: unaligned supported by "
1281 "hardware.\n");
1283 break;
1285 case dr_explicit_realign:
1287 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1288 vector_load, stmt_info, 0, vect_body);
1289 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1290 vec_perm, stmt_info, 0, vect_body);
1292 /* FIXME: If the misalignment remains fixed across the iterations of
1293 the containing loop, the following cost should be added to the
1294 prologue costs. */
1295 if (targetm.vectorize.builtin_mask_for_load)
1296 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1297 stmt_info, 0, vect_body);
1299 if (dump_enabled_p ())
1300 dump_printf_loc (MSG_NOTE, vect_location,
1301 "vect_model_load_cost: explicit realign\n");
1303 break;
1305 case dr_explicit_realign_optimized:
1307 if (dump_enabled_p ())
1308 dump_printf_loc (MSG_NOTE, vect_location,
1309 "vect_model_load_cost: unaligned software "
1310 "pipelined.\n");
1312 /* Unaligned software pipeline has a load of an address, an initial
1313 load, and possibly a mask operation to "prime" the loop. However,
1314 if this is an access in a group of loads, which provide grouped
1315 access, then the above cost should only be considered for one
1316 access in the group. Inside the loop, there is a load op
1317 and a realignment op. */
1319 if (add_realign_cost && record_prologue_costs)
1321 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1322 vector_stmt, stmt_info,
1323 0, vect_prologue);
1324 if (targetm.vectorize.builtin_mask_for_load)
1325 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1326 vector_stmt, stmt_info,
1327 0, vect_prologue);
1330 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1331 stmt_info, 0, vect_body);
1332 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1333 stmt_info, 0, vect_body);
1335 if (dump_enabled_p ())
1336 dump_printf_loc (MSG_NOTE, vect_location,
1337 "vect_model_load_cost: explicit realign optimized"
1338 "\n");
1340 break;
1343 case dr_unaligned_unsupported:
1345 *inside_cost = VECT_MAX_COST;
1347 if (dump_enabled_p ())
1348 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1349 "vect_model_load_cost: unsupported access.\n");
1350 break;
1353 default:
1354 gcc_unreachable ();
1358 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1359 the loop preheader for the vectorized stmt STMT. */
1361 static void
1362 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1364 if (gsi)
1365 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1366 else
1368 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1369 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1371 if (loop_vinfo)
1373 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1374 basic_block new_bb;
1375 edge pe;
1377 if (nested_in_vect_loop_p (loop, stmt))
1378 loop = loop->inner;
1380 pe = loop_preheader_edge (loop);
1381 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1382 gcc_assert (!new_bb);
1384 else
1386 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1387 basic_block bb;
1388 gimple_stmt_iterator gsi_bb_start;
1390 gcc_assert (bb_vinfo);
1391 bb = BB_VINFO_BB (bb_vinfo);
1392 gsi_bb_start = gsi_after_labels (bb);
1393 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1397 if (dump_enabled_p ())
1399 dump_printf_loc (MSG_NOTE, vect_location,
1400 "created new init_stmt: ");
1401 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1405 /* Function vect_init_vector.
1407 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1408 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1409 vector type a vector with all elements equal to VAL is created first.
1410 Place the initialization at BSI if it is not NULL. Otherwise, place the
1411 initialization at the loop preheader.
1412 Return the DEF of INIT_STMT.
1413 It will be used in the vectorization of STMT. */
1415 tree
1416 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1418 gimple *init_stmt;
1419 tree new_temp;
1421 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1422 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1424 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1425 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1427 /* Scalar boolean value should be transformed into
1428 all zeros or all ones value before building a vector. */
1429 if (VECTOR_BOOLEAN_TYPE_P (type))
1431 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1432 tree false_val = build_zero_cst (TREE_TYPE (type));
1434 if (CONSTANT_CLASS_P (val))
1435 val = integer_zerop (val) ? false_val : true_val;
1436 else
1438 new_temp = make_ssa_name (TREE_TYPE (type));
1439 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1440 val, true_val, false_val);
1441 vect_init_vector_1 (stmt, init_stmt, gsi);
1442 val = new_temp;
1445 else if (CONSTANT_CLASS_P (val))
1446 val = fold_convert (TREE_TYPE (type), val);
1447 else
1449 new_temp = make_ssa_name (TREE_TYPE (type));
1450 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1451 init_stmt = gimple_build_assign (new_temp,
1452 fold_build1 (VIEW_CONVERT_EXPR,
1453 TREE_TYPE (type),
1454 val));
1455 else
1456 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1457 vect_init_vector_1 (stmt, init_stmt, gsi);
1458 val = new_temp;
1461 val = build_vector_from_val (type, val);
1464 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1465 init_stmt = gimple_build_assign (new_temp, val);
1466 vect_init_vector_1 (stmt, init_stmt, gsi);
1467 return new_temp;
1470 /* Function vect_get_vec_def_for_operand_1.
1472 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1473 DT that will be used in the vectorized stmt. */
1475 tree
1476 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1478 tree vec_oprnd;
1479 gimple *vec_stmt;
1480 stmt_vec_info def_stmt_info = NULL;
1482 switch (dt)
1484 /* operand is a constant or a loop invariant. */
1485 case vect_constant_def:
1486 case vect_external_def:
1487 /* Code should use vect_get_vec_def_for_operand. */
1488 gcc_unreachable ();
1490 /* operand is defined inside the loop. */
1491 case vect_internal_def:
1493 /* Get the def from the vectorized stmt. */
1494 def_stmt_info = vinfo_for_stmt (def_stmt);
1496 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1497 /* Get vectorized pattern statement. */
1498 if (!vec_stmt
1499 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1500 && !STMT_VINFO_RELEVANT (def_stmt_info))
1501 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1502 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1503 gcc_assert (vec_stmt);
1504 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1505 vec_oprnd = PHI_RESULT (vec_stmt);
1506 else if (is_gimple_call (vec_stmt))
1507 vec_oprnd = gimple_call_lhs (vec_stmt);
1508 else
1509 vec_oprnd = gimple_assign_lhs (vec_stmt);
1510 return vec_oprnd;
1513 /* operand is defined by a loop header phi. */
1514 case vect_reduction_def:
1515 case vect_double_reduction_def:
1516 case vect_nested_cycle:
1517 case vect_induction_def:
1519 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1521 /* Get the def from the vectorized stmt. */
1522 def_stmt_info = vinfo_for_stmt (def_stmt);
1523 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1524 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1525 vec_oprnd = PHI_RESULT (vec_stmt);
1526 else
1527 vec_oprnd = gimple_get_lhs (vec_stmt);
1528 return vec_oprnd;
1531 default:
1532 gcc_unreachable ();
1537 /* Function vect_get_vec_def_for_operand.
1539 OP is an operand in STMT. This function returns a (vector) def that will be
1540 used in the vectorized stmt for STMT.
1542 In the case that OP is an SSA_NAME which is defined in the loop, then
1543 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1545 In case OP is an invariant or constant, a new stmt that creates a vector def
1546 needs to be introduced. VECTYPE may be used to specify a required type for
1547 vector invariant. */
1549 tree
1550 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1552 gimple *def_stmt;
1553 enum vect_def_type dt;
1554 bool is_simple_use;
1555 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1556 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1558 if (dump_enabled_p ())
1560 dump_printf_loc (MSG_NOTE, vect_location,
1561 "vect_get_vec_def_for_operand: ");
1562 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1563 dump_printf (MSG_NOTE, "\n");
1566 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt, &def_stmt);
1567 gcc_assert (is_simple_use);
1568 if (def_stmt && dump_enabled_p ())
1570 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1571 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1574 if (dt == vect_constant_def || dt == vect_external_def)
1576 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1577 tree vector_type;
1579 if (vectype)
1580 vector_type = vectype;
1581 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1582 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1583 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1584 else
1585 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1587 gcc_assert (vector_type);
1588 return vect_init_vector (stmt, op, vector_type, NULL);
1590 else
1591 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1595 /* Function vect_get_vec_def_for_stmt_copy
1597 Return a vector-def for an operand. This function is used when the
1598 vectorized stmt to be created (by the caller to this function) is a "copy"
1599 created in case the vectorized result cannot fit in one vector, and several
1600 copies of the vector-stmt are required. In this case the vector-def is
1601 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1602 of the stmt that defines VEC_OPRND.
1603 DT is the type of the vector def VEC_OPRND.
1605 Context:
1606 In case the vectorization factor (VF) is bigger than the number
1607 of elements that can fit in a vectype (nunits), we have to generate
1608 more than one vector stmt to vectorize the scalar stmt. This situation
1609 arises when there are multiple data-types operated upon in the loop; the
1610 smallest data-type determines the VF, and as a result, when vectorizing
1611 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1612 vector stmt (each computing a vector of 'nunits' results, and together
1613 computing 'VF' results in each iteration). This function is called when
1614 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1615 which VF=16 and nunits=4, so the number of copies required is 4):
1617 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1619 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1620 VS1.1: vx.1 = memref1 VS1.2
1621 VS1.2: vx.2 = memref2 VS1.3
1622 VS1.3: vx.3 = memref3
1624 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1625 VSnew.1: vz1 = vx.1 + ... VSnew.2
1626 VSnew.2: vz2 = vx.2 + ... VSnew.3
1627 VSnew.3: vz3 = vx.3 + ...
1629 The vectorization of S1 is explained in vectorizable_load.
1630 The vectorization of S2:
1631 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1632 the function 'vect_get_vec_def_for_operand' is called to
1633 get the relevant vector-def for each operand of S2. For operand x it
1634 returns the vector-def 'vx.0'.
1636 To create the remaining copies of the vector-stmt (VSnew.j), this
1637 function is called to get the relevant vector-def for each operand. It is
1638 obtained from the respective VS1.j stmt, which is recorded in the
1639 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1641 For example, to obtain the vector-def 'vx.1' in order to create the
1642 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1643 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1644 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1645 and return its def ('vx.1').
1646 Overall, to create the above sequence this function will be called 3 times:
1647 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1648 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1649 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1651 tree
1652 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1654 gimple *vec_stmt_for_operand;
1655 stmt_vec_info def_stmt_info;
1657 /* Do nothing; can reuse same def. */
1658 if (dt == vect_external_def || dt == vect_constant_def )
1659 return vec_oprnd;
1661 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1662 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1663 gcc_assert (def_stmt_info);
1664 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1665 gcc_assert (vec_stmt_for_operand);
1666 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1667 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1668 else
1669 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1670 return vec_oprnd;
1674 /* Get vectorized definitions for the operands to create a copy of an original
1675 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1677 void
1678 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1679 vec<tree> *vec_oprnds0,
1680 vec<tree> *vec_oprnds1)
1682 tree vec_oprnd = vec_oprnds0->pop ();
1684 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1685 vec_oprnds0->quick_push (vec_oprnd);
1687 if (vec_oprnds1 && vec_oprnds1->length ())
1689 vec_oprnd = vec_oprnds1->pop ();
1690 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1691 vec_oprnds1->quick_push (vec_oprnd);
1696 /* Get vectorized definitions for OP0 and OP1. */
1698 void
1699 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1700 vec<tree> *vec_oprnds0,
1701 vec<tree> *vec_oprnds1,
1702 slp_tree slp_node)
1704 if (slp_node)
1706 int nops = (op1 == NULL_TREE) ? 1 : 2;
1707 auto_vec<tree> ops (nops);
1708 auto_vec<vec<tree> > vec_defs (nops);
1710 ops.quick_push (op0);
1711 if (op1)
1712 ops.quick_push (op1);
1714 vect_get_slp_defs (ops, slp_node, &vec_defs);
1716 *vec_oprnds0 = vec_defs[0];
1717 if (op1)
1718 *vec_oprnds1 = vec_defs[1];
1720 else
1722 tree vec_oprnd;
1724 vec_oprnds0->create (1);
1725 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1726 vec_oprnds0->quick_push (vec_oprnd);
1728 if (op1)
1730 vec_oprnds1->create (1);
1731 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1732 vec_oprnds1->quick_push (vec_oprnd);
1737 /* Helper function called by vect_finish_replace_stmt and
1738 vect_finish_stmt_generation. Set the location of the new
1739 statement and create a stmt_vec_info for it. */
1741 static void
1742 vect_finish_stmt_generation_1 (gimple *stmt, gimple *vec_stmt)
1744 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1745 vec_info *vinfo = stmt_info->vinfo;
1747 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1749 if (dump_enabled_p ())
1751 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1752 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1755 gimple_set_location (vec_stmt, gimple_location (stmt));
1757 /* While EH edges will generally prevent vectorization, stmt might
1758 e.g. be in a must-not-throw region. Ensure newly created stmts
1759 that could throw are part of the same region. */
1760 int lp_nr = lookup_stmt_eh_lp (stmt);
1761 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1762 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1765 /* Replace the scalar statement STMT with a new vector statement VEC_STMT,
1766 which sets the same scalar result as STMT did. */
1768 void
1769 vect_finish_replace_stmt (gimple *stmt, gimple *vec_stmt)
1771 gcc_assert (gimple_get_lhs (stmt) == gimple_get_lhs (vec_stmt));
1773 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1774 gsi_replace (&gsi, vec_stmt, false);
1776 vect_finish_stmt_generation_1 (stmt, vec_stmt);
1779 /* Function vect_finish_stmt_generation.
1781 Insert a new stmt. */
1783 void
1784 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1785 gimple_stmt_iterator *gsi)
1787 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1789 if (!gsi_end_p (*gsi)
1790 && gimple_has_mem_ops (vec_stmt))
1792 gimple *at_stmt = gsi_stmt (*gsi);
1793 tree vuse = gimple_vuse (at_stmt);
1794 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1796 tree vdef = gimple_vdef (at_stmt);
1797 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1798 /* If we have an SSA vuse and insert a store, update virtual
1799 SSA form to avoid triggering the renamer. Do so only
1800 if we can easily see all uses - which is what almost always
1801 happens with the way vectorized stmts are inserted. */
1802 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1803 && ((is_gimple_assign (vec_stmt)
1804 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1805 || (is_gimple_call (vec_stmt)
1806 && !(gimple_call_flags (vec_stmt)
1807 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1809 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1810 gimple_set_vdef (vec_stmt, new_vdef);
1811 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1815 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1816 vect_finish_stmt_generation_1 (stmt, vec_stmt);
1819 /* We want to vectorize a call to combined function CFN with function
1820 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1821 as the types of all inputs. Check whether this is possible using
1822 an internal function, returning its code if so or IFN_LAST if not. */
1824 static internal_fn
1825 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1826 tree vectype_out, tree vectype_in)
1828 internal_fn ifn;
1829 if (internal_fn_p (cfn))
1830 ifn = as_internal_fn (cfn);
1831 else
1832 ifn = associated_internal_fn (fndecl);
1833 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1835 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1836 if (info.vectorizable)
1838 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1839 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1840 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1841 OPTIMIZE_FOR_SPEED))
1842 return ifn;
1845 return IFN_LAST;
1849 static tree permute_vec_elements (tree, tree, tree, gimple *,
1850 gimple_stmt_iterator *);
1852 /* Check whether a load or store statement in the loop described by
1853 LOOP_VINFO is possible in a fully-masked loop. This is testing
1854 whether the vectorizer pass has the appropriate support, as well as
1855 whether the target does.
1857 VLS_TYPE says whether the statement is a load or store and VECTYPE
1858 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1859 says how the load or store is going to be implemented and GROUP_SIZE
1860 is the number of load or store statements in the containing group.
1861 If the access is a gather load or scatter store, GS_INFO describes
1862 its arguments.
1864 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1865 supported, otherwise record the required mask types. */
1867 static void
1868 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1869 vec_load_store_type vls_type, int group_size,
1870 vect_memory_access_type memory_access_type,
1871 gather_scatter_info *gs_info)
1873 /* Invariant loads need no special support. */
1874 if (memory_access_type == VMAT_INVARIANT)
1875 return;
1877 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1878 machine_mode vecmode = TYPE_MODE (vectype);
1879 bool is_load = (vls_type == VLS_LOAD);
1880 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1882 if (is_load
1883 ? !vect_load_lanes_supported (vectype, group_size, true)
1884 : !vect_store_lanes_supported (vectype, group_size, true))
1886 if (dump_enabled_p ())
1887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1888 "can't use a fully-masked loop because the"
1889 " target doesn't have an appropriate masked"
1890 " load/store-lanes instruction.\n");
1891 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1892 return;
1894 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1895 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1896 return;
1899 if (memory_access_type == VMAT_GATHER_SCATTER)
1901 internal_fn ifn = (is_load
1902 ? IFN_MASK_GATHER_LOAD
1903 : IFN_MASK_SCATTER_STORE);
1904 tree offset_type = TREE_TYPE (gs_info->offset);
1905 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1906 gs_info->memory_type,
1907 TYPE_SIGN (offset_type),
1908 gs_info->scale))
1910 if (dump_enabled_p ())
1911 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1912 "can't use a fully-masked loop because the"
1913 " target doesn't have an appropriate masked"
1914 " gather load or scatter store instruction.\n");
1915 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1916 return;
1918 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1919 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1920 return;
1923 if (memory_access_type != VMAT_CONTIGUOUS
1924 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1926 /* Element X of the data must come from iteration i * VF + X of the
1927 scalar loop. We need more work to support other mappings. */
1928 if (dump_enabled_p ())
1929 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1930 "can't use a fully-masked loop because an access"
1931 " isn't contiguous.\n");
1932 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1933 return;
1936 machine_mode mask_mode;
1937 if (!(targetm.vectorize.get_mask_mode
1938 (GET_MODE_NUNITS (vecmode),
1939 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1940 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1942 if (dump_enabled_p ())
1943 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1944 "can't use a fully-masked loop because the target"
1945 " doesn't have the appropriate masked load or"
1946 " store.\n");
1947 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1948 return;
1950 /* We might load more scalars than we need for permuting SLP loads.
1951 We checked in get_group_load_store_type that the extra elements
1952 don't leak into a new vector. */
1953 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1954 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1955 unsigned int nvectors;
1956 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1957 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1958 else
1959 gcc_unreachable ();
1962 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1963 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1964 that needs to be applied to all loads and stores in a vectorized loop.
1965 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1967 MASK_TYPE is the type of both masks. If new statements are needed,
1968 insert them before GSI. */
1970 static tree
1971 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1972 gimple_stmt_iterator *gsi)
1974 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1975 if (!loop_mask)
1976 return vec_mask;
1978 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1979 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1980 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1981 vec_mask, loop_mask);
1982 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1983 return and_res;
1986 /* Determine whether we can use a gather load or scatter store to vectorize
1987 strided load or store STMT by truncating the current offset to a smaller
1988 width. We need to be able to construct an offset vector:
1990 { 0, X, X*2, X*3, ... }
1992 without loss of precision, where X is STMT's DR_STEP.
1994 Return true if this is possible, describing the gather load or scatter
1995 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1997 static bool
1998 vect_truncate_gather_scatter_offset (gimple *stmt, loop_vec_info loop_vinfo,
1999 bool masked_p,
2000 gather_scatter_info *gs_info)
2002 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2003 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2004 tree step = DR_STEP (dr);
2005 if (TREE_CODE (step) != INTEGER_CST)
2007 /* ??? Perhaps we could use range information here? */
2008 if (dump_enabled_p ())
2009 dump_printf_loc (MSG_NOTE, vect_location,
2010 "cannot truncate variable step.\n");
2011 return false;
2014 /* Get the number of bits in an element. */
2015 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2016 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2017 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2019 /* Set COUNT to the upper limit on the number of elements - 1.
2020 Start with the maximum vectorization factor. */
2021 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
2023 /* Try lowering COUNT to the number of scalar latch iterations. */
2024 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2025 widest_int max_iters;
2026 if (max_loop_iterations (loop, &max_iters)
2027 && max_iters < count)
2028 count = max_iters.to_shwi ();
2030 /* Try scales of 1 and the element size. */
2031 int scales[] = { 1, vect_get_scalar_dr_size (dr) };
2032 wi::overflow_type overflow = wi::OVF_NONE;
2033 for (int i = 0; i < 2; ++i)
2035 int scale = scales[i];
2036 widest_int factor;
2037 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
2038 continue;
2040 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
2041 in OFFSET_BITS bits. */
2042 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
2043 if (overflow)
2044 continue;
2045 signop sign = range >= 0 ? UNSIGNED : SIGNED;
2046 if (wi::min_precision (range, sign) > element_bits)
2048 overflow = wi::OVF_UNKNOWN;
2049 continue;
2052 /* See whether the target supports the operation. */
2053 tree memory_type = TREE_TYPE (DR_REF (dr));
2054 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
2055 memory_type, element_bits, sign, scale,
2056 &gs_info->ifn, &gs_info->element_type))
2057 continue;
2059 tree offset_type = build_nonstandard_integer_type (element_bits,
2060 sign == UNSIGNED);
2062 gs_info->decl = NULL_TREE;
2063 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2064 but we don't need to store that here. */
2065 gs_info->base = NULL_TREE;
2066 gs_info->offset = fold_convert (offset_type, step);
2067 gs_info->offset_dt = vect_constant_def;
2068 gs_info->offset_vectype = NULL_TREE;
2069 gs_info->scale = scale;
2070 gs_info->memory_type = memory_type;
2071 return true;
2074 if (overflow && dump_enabled_p ())
2075 dump_printf_loc (MSG_NOTE, vect_location,
2076 "truncating gather/scatter offset to %d bits"
2077 " might change its value.\n", element_bits);
2079 return false;
2082 /* Return true if we can use gather/scatter internal functions to
2083 vectorize STMT, which is a grouped or strided load or store.
2084 MASKED_P is true if load or store is conditional. When returning
2085 true, fill in GS_INFO with the information required to perform the
2086 operation. */
2088 static bool
2089 vect_use_strided_gather_scatters_p (gimple *stmt, loop_vec_info loop_vinfo,
2090 bool masked_p,
2091 gather_scatter_info *gs_info)
2093 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info)
2094 || gs_info->decl)
2095 return vect_truncate_gather_scatter_offset (stmt, loop_vinfo,
2096 masked_p, gs_info);
2098 scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
2099 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2100 tree offset_type = TREE_TYPE (gs_info->offset);
2101 unsigned int offset_bits = TYPE_PRECISION (offset_type);
2103 /* Enforced by vect_check_gather_scatter. */
2104 gcc_assert (element_bits >= offset_bits);
2106 /* If the elements are wider than the offset, convert the offset to the
2107 same width, without changing its sign. */
2108 if (element_bits > offset_bits)
2110 bool unsigned_p = TYPE_UNSIGNED (offset_type);
2111 offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
2112 gs_info->offset = fold_convert (offset_type, gs_info->offset);
2115 if (dump_enabled_p ())
2116 dump_printf_loc (MSG_NOTE, vect_location,
2117 "using gather/scatter for strided/grouped access,"
2118 " scale = %d\n", gs_info->scale);
2120 return true;
2123 /* STMT is a non-strided load or store, meaning that it accesses
2124 elements with a known constant step. Return -1 if that step
2125 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2127 static int
2128 compare_step_with_zero (gimple *stmt)
2130 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2131 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2132 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
2133 size_zero_node);
2136 /* If the target supports a permute mask that reverses the elements in
2137 a vector of type VECTYPE, return that mask, otherwise return null. */
2139 static tree
2140 perm_mask_for_reverse (tree vectype)
2142 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2144 /* The encoding has a single stepped pattern. */
2145 vec_perm_builder sel (nunits, 1, 3);
2146 for (int i = 0; i < 3; ++i)
2147 sel.quick_push (nunits - 1 - i);
2149 vec_perm_indices indices (sel, 1, nunits);
2150 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2151 return NULL_TREE;
2152 return vect_gen_perm_mask_checked (vectype, indices);
2155 /* STMT is either a masked or unconditional store. Return the value
2156 being stored. */
2158 tree
2159 vect_get_store_rhs (gimple *stmt)
2161 if (gassign *assign = dyn_cast <gassign *> (stmt))
2163 gcc_assert (gimple_assign_single_p (assign));
2164 return gimple_assign_rhs1 (assign);
2166 if (gcall *call = dyn_cast <gcall *> (stmt))
2168 internal_fn ifn = gimple_call_internal_fn (call);
2169 int index = internal_fn_stored_value_index (ifn);
2170 gcc_assert (index >= 0);
2171 return gimple_call_arg (stmt, index);
2173 gcc_unreachable ();
2176 /* A subroutine of get_load_store_type, with a subset of the same
2177 arguments. Handle the case where STMT is part of a grouped load
2178 or store.
2180 For stores, the statements in the group are all consecutive
2181 and there is no gap at the end. For loads, the statements in the
2182 group might not be consecutive; there can be gaps between statements
2183 as well as at the end. */
2185 static bool
2186 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
2187 bool masked_p, vec_load_store_type vls_type,
2188 vect_memory_access_type *memory_access_type,
2189 gather_scatter_info *gs_info)
2191 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2192 vec_info *vinfo = stmt_info->vinfo;
2193 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2194 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2195 gimple *first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
2196 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2197 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
2198 bool single_element_p = (stmt == first_stmt
2199 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2200 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (vinfo_for_stmt (first_stmt));
2201 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2203 /* True if the vectorized statements would access beyond the last
2204 statement in the group. */
2205 bool overrun_p = false;
2207 /* True if we can cope with such overrun by peeling for gaps, so that
2208 there is at least one final scalar iteration after the vector loop. */
2209 bool can_overrun_p = (!masked_p
2210 && vls_type == VLS_LOAD
2211 && loop_vinfo
2212 && !loop->inner);
2214 /* There can only be a gap at the end of the group if the stride is
2215 known at compile time. */
2216 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
2218 /* Stores can't yet have gaps. */
2219 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2221 if (slp)
2223 if (STMT_VINFO_STRIDED_P (stmt_info))
2225 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2226 separated by the stride, until we have a complete vector.
2227 Fall back to scalar accesses if that isn't possible. */
2228 if (multiple_p (nunits, group_size))
2229 *memory_access_type = VMAT_STRIDED_SLP;
2230 else
2231 *memory_access_type = VMAT_ELEMENTWISE;
2233 else
2235 overrun_p = loop_vinfo && gap != 0;
2236 if (overrun_p && vls_type != VLS_LOAD)
2238 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2239 "Grouped store with gaps requires"
2240 " non-consecutive accesses\n");
2241 return false;
2243 /* An overrun is fine if the trailing elements are smaller
2244 than the alignment boundary B. Every vector access will
2245 be a multiple of B and so we are guaranteed to access a
2246 non-gap element in the same B-sized block. */
2247 if (overrun_p
2248 && gap < (vect_known_alignment_in_bytes (first_dr)
2249 / vect_get_scalar_dr_size (first_dr)))
2250 overrun_p = false;
2251 if (overrun_p && !can_overrun_p)
2253 if (dump_enabled_p ())
2254 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2255 "Peeling for outer loop is not supported\n");
2256 return false;
2258 *memory_access_type = VMAT_CONTIGUOUS;
2261 else
2263 /* We can always handle this case using elementwise accesses,
2264 but see if something more efficient is available. */
2265 *memory_access_type = VMAT_ELEMENTWISE;
2267 /* If there is a gap at the end of the group then these optimizations
2268 would access excess elements in the last iteration. */
2269 bool would_overrun_p = (gap != 0);
2270 /* An overrun is fine if the trailing elements are smaller than the
2271 alignment boundary B. Every vector access will be a multiple of B
2272 and so we are guaranteed to access a non-gap element in the
2273 same B-sized block. */
2274 if (would_overrun_p
2275 && !masked_p
2276 && gap < (vect_known_alignment_in_bytes (first_dr)
2277 / vect_get_scalar_dr_size (first_dr)))
2278 would_overrun_p = false;
2280 if (!STMT_VINFO_STRIDED_P (stmt_info)
2281 && (can_overrun_p || !would_overrun_p)
2282 && compare_step_with_zero (stmt) > 0)
2284 /* First cope with the degenerate case of a single-element
2285 vector. */
2286 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2287 *memory_access_type = VMAT_CONTIGUOUS;
2289 /* Otherwise try using LOAD/STORE_LANES. */
2290 if (*memory_access_type == VMAT_ELEMENTWISE
2291 && (vls_type == VLS_LOAD
2292 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2293 : vect_store_lanes_supported (vectype, group_size,
2294 masked_p)))
2296 *memory_access_type = VMAT_LOAD_STORE_LANES;
2297 overrun_p = would_overrun_p;
2300 /* If that fails, try using permuting loads. */
2301 if (*memory_access_type == VMAT_ELEMENTWISE
2302 && (vls_type == VLS_LOAD
2303 ? vect_grouped_load_supported (vectype, single_element_p,
2304 group_size)
2305 : vect_grouped_store_supported (vectype, group_size)))
2307 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2308 overrun_p = would_overrun_p;
2312 /* As a last resort, trying using a gather load or scatter store.
2314 ??? Although the code can handle all group sizes correctly,
2315 it probably isn't a win to use separate strided accesses based
2316 on nearby locations. Or, even if it's a win over scalar code,
2317 it might not be a win over vectorizing at a lower VF, if that
2318 allows us to use contiguous accesses. */
2319 if (*memory_access_type == VMAT_ELEMENTWISE
2320 && single_element_p
2321 && loop_vinfo
2322 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2323 masked_p, gs_info))
2324 *memory_access_type = VMAT_GATHER_SCATTER;
2327 if (vls_type != VLS_LOAD && first_stmt == stmt)
2329 /* STMT is the leader of the group. Check the operands of all the
2330 stmts of the group. */
2331 gimple *next_stmt = DR_GROUP_NEXT_ELEMENT (stmt_info);
2332 while (next_stmt)
2334 tree op = vect_get_store_rhs (next_stmt);
2335 enum vect_def_type dt;
2336 if (!vect_is_simple_use (op, vinfo, &dt))
2338 if (dump_enabled_p ())
2339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2340 "use not simple.\n");
2341 return false;
2343 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2347 if (overrun_p)
2349 gcc_assert (can_overrun_p);
2350 if (dump_enabled_p ())
2351 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2352 "Data access with gaps requires scalar "
2353 "epilogue loop\n");
2354 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2357 return true;
2360 /* A subroutine of get_load_store_type, with a subset of the same
2361 arguments. Handle the case where STMT is a load or store that
2362 accesses consecutive elements with a negative step. */
2364 static vect_memory_access_type
2365 get_negative_load_store_type (gimple *stmt, tree vectype,
2366 vec_load_store_type vls_type,
2367 unsigned int ncopies)
2369 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2370 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2371 dr_alignment_support alignment_support_scheme;
2373 if (ncopies > 1)
2375 if (dump_enabled_p ())
2376 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2377 "multiple types with negative step.\n");
2378 return VMAT_ELEMENTWISE;
2381 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
2382 if (alignment_support_scheme != dr_aligned
2383 && alignment_support_scheme != dr_unaligned_supported)
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2387 "negative step but alignment required.\n");
2388 return VMAT_ELEMENTWISE;
2391 if (vls_type == VLS_STORE_INVARIANT)
2393 if (dump_enabled_p ())
2394 dump_printf_loc (MSG_NOTE, vect_location,
2395 "negative step with invariant source;"
2396 " no permute needed.\n");
2397 return VMAT_CONTIGUOUS_DOWN;
2400 if (!perm_mask_for_reverse (vectype))
2402 if (dump_enabled_p ())
2403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2404 "negative step and reversing not supported.\n");
2405 return VMAT_ELEMENTWISE;
2408 return VMAT_CONTIGUOUS_REVERSE;
2411 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
2412 if there is a memory access type that the vectorized form can use,
2413 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2414 or scatters, fill in GS_INFO accordingly.
2416 SLP says whether we're performing SLP rather than loop vectorization.
2417 MASKED_P is true if the statement is conditional on a vectorized mask.
2418 VECTYPE is the vector type that the vectorized statements will use.
2419 NCOPIES is the number of vector statements that will be needed. */
2421 static bool
2422 get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p,
2423 vec_load_store_type vls_type, unsigned int ncopies,
2424 vect_memory_access_type *memory_access_type,
2425 gather_scatter_info *gs_info)
2427 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2428 vec_info *vinfo = stmt_info->vinfo;
2429 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2430 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2431 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2433 *memory_access_type = VMAT_GATHER_SCATTER;
2434 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
2435 gcc_unreachable ();
2436 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2437 &gs_info->offset_dt,
2438 &gs_info->offset_vectype))
2440 if (dump_enabled_p ())
2441 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2442 "%s index use not simple.\n",
2443 vls_type == VLS_LOAD ? "gather" : "scatter");
2444 return false;
2447 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2449 if (!get_group_load_store_type (stmt, vectype, slp, masked_p, vls_type,
2450 memory_access_type, gs_info))
2451 return false;
2453 else if (STMT_VINFO_STRIDED_P (stmt_info))
2455 gcc_assert (!slp);
2456 if (loop_vinfo
2457 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2458 masked_p, gs_info))
2459 *memory_access_type = VMAT_GATHER_SCATTER;
2460 else
2461 *memory_access_type = VMAT_ELEMENTWISE;
2463 else
2465 int cmp = compare_step_with_zero (stmt);
2466 if (cmp < 0)
2467 *memory_access_type = get_negative_load_store_type
2468 (stmt, vectype, vls_type, ncopies);
2469 else if (cmp == 0)
2471 gcc_assert (vls_type == VLS_LOAD);
2472 *memory_access_type = VMAT_INVARIANT;
2474 else
2475 *memory_access_type = VMAT_CONTIGUOUS;
2478 if ((*memory_access_type == VMAT_ELEMENTWISE
2479 || *memory_access_type == VMAT_STRIDED_SLP)
2480 && !nunits.is_constant ())
2482 if (dump_enabled_p ())
2483 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2484 "Not using elementwise accesses due to variable "
2485 "vectorization factor.\n");
2486 return false;
2489 /* FIXME: At the moment the cost model seems to underestimate the
2490 cost of using elementwise accesses. This check preserves the
2491 traditional behavior until that can be fixed. */
2492 if (*memory_access_type == VMAT_ELEMENTWISE
2493 && !STMT_VINFO_STRIDED_P (stmt_info)
2494 && !(stmt == DR_GROUP_FIRST_ELEMENT (stmt_info)
2495 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2496 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2498 if (dump_enabled_p ())
2499 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2500 "not falling back to elementwise accesses\n");
2501 return false;
2503 return true;
2506 /* Return true if boolean argument MASK is suitable for vectorizing
2507 conditional load or store STMT. When returning true, store the type
2508 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2509 in *MASK_VECTYPE_OUT. */
2511 static bool
2512 vect_check_load_store_mask (gimple *stmt, tree mask,
2513 vect_def_type *mask_dt_out,
2514 tree *mask_vectype_out)
2516 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2518 if (dump_enabled_p ())
2519 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2520 "mask argument is not a boolean.\n");
2521 return false;
2524 if (TREE_CODE (mask) != SSA_NAME)
2526 if (dump_enabled_p ())
2527 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2528 "mask argument is not an SSA name.\n");
2529 return false;
2532 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2533 enum vect_def_type mask_dt;
2534 tree mask_vectype;
2535 if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
2537 if (dump_enabled_p ())
2538 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2539 "mask use not simple.\n");
2540 return false;
2543 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2544 if (!mask_vectype)
2545 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2547 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2549 if (dump_enabled_p ())
2550 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2551 "could not find an appropriate vector mask type.\n");
2552 return false;
2555 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2556 TYPE_VECTOR_SUBPARTS (vectype)))
2558 if (dump_enabled_p ())
2560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2561 "vector mask type ");
2562 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
2563 dump_printf (MSG_MISSED_OPTIMIZATION,
2564 " does not match vector data type ");
2565 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
2566 dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
2568 return false;
2571 *mask_dt_out = mask_dt;
2572 *mask_vectype_out = mask_vectype;
2573 return true;
2576 /* Return true if stored value RHS is suitable for vectorizing store
2577 statement STMT. When returning true, store the type of the
2578 definition in *RHS_DT_OUT, the type of the vectorized store value in
2579 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2581 static bool
2582 vect_check_store_rhs (gimple *stmt, tree rhs, vect_def_type *rhs_dt_out,
2583 tree *rhs_vectype_out, vec_load_store_type *vls_type_out)
2585 /* In the case this is a store from a constant make sure
2586 native_encode_expr can handle it. */
2587 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2589 if (dump_enabled_p ())
2590 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2591 "cannot encode constant as a byte sequence.\n");
2592 return false;
2595 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2596 enum vect_def_type rhs_dt;
2597 tree rhs_vectype;
2598 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
2600 if (dump_enabled_p ())
2601 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2602 "use not simple.\n");
2603 return false;
2606 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2607 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2609 if (dump_enabled_p ())
2610 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2611 "incompatible vector types.\n");
2612 return false;
2615 *rhs_dt_out = rhs_dt;
2616 *rhs_vectype_out = rhs_vectype;
2617 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2618 *vls_type_out = VLS_STORE_INVARIANT;
2619 else
2620 *vls_type_out = VLS_STORE;
2621 return true;
2624 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2625 Note that we support masks with floating-point type, in which case the
2626 floats are interpreted as a bitmask. */
2628 static tree
2629 vect_build_all_ones_mask (gimple *stmt, tree masktype)
2631 if (TREE_CODE (masktype) == INTEGER_TYPE)
2632 return build_int_cst (masktype, -1);
2633 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2635 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2636 mask = build_vector_from_val (masktype, mask);
2637 return vect_init_vector (stmt, mask, masktype, NULL);
2639 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2641 REAL_VALUE_TYPE r;
2642 long tmp[6];
2643 for (int j = 0; j < 6; ++j)
2644 tmp[j] = -1;
2645 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2646 tree mask = build_real (TREE_TYPE (masktype), r);
2647 mask = build_vector_from_val (masktype, mask);
2648 return vect_init_vector (stmt, mask, masktype, NULL);
2650 gcc_unreachable ();
2653 /* Build an all-zero merge value of type VECTYPE while vectorizing
2654 STMT as a gather load. */
2656 static tree
2657 vect_build_zero_merge_argument (gimple *stmt, tree vectype)
2659 tree merge;
2660 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2661 merge = build_int_cst (TREE_TYPE (vectype), 0);
2662 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2664 REAL_VALUE_TYPE r;
2665 long tmp[6];
2666 for (int j = 0; j < 6; ++j)
2667 tmp[j] = 0;
2668 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2669 merge = build_real (TREE_TYPE (vectype), r);
2671 else
2672 gcc_unreachable ();
2673 merge = build_vector_from_val (vectype, merge);
2674 return vect_init_vector (stmt, merge, vectype, NULL);
2677 /* Build a gather load call while vectorizing STMT. Insert new instructions
2678 before GSI and add them to VEC_STMT. GS_INFO describes the gather load
2679 operation. If the load is conditional, MASK is the unvectorized
2680 condition and MASK_DT is its definition type, otherwise MASK is null. */
2682 static void
2683 vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
2684 gimple **vec_stmt, gather_scatter_info *gs_info,
2685 tree mask, vect_def_type mask_dt)
2687 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2688 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2689 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2690 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2691 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2692 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2693 edge pe = loop_preheader_edge (loop);
2694 enum { NARROW, NONE, WIDEN } modifier;
2695 poly_uint64 gather_off_nunits
2696 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2698 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2699 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2700 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2701 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2702 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2703 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2704 tree scaletype = TREE_VALUE (arglist);
2705 gcc_checking_assert (types_compatible_p (srctype, rettype)
2706 && (!mask || types_compatible_p (srctype, masktype)));
2708 tree perm_mask = NULL_TREE;
2709 tree mask_perm_mask = NULL_TREE;
2710 if (known_eq (nunits, gather_off_nunits))
2711 modifier = NONE;
2712 else if (known_eq (nunits * 2, gather_off_nunits))
2714 modifier = WIDEN;
2716 /* Currently widening gathers and scatters are only supported for
2717 fixed-length vectors. */
2718 int count = gather_off_nunits.to_constant ();
2719 vec_perm_builder sel (count, count, 1);
2720 for (int i = 0; i < count; ++i)
2721 sel.quick_push (i | (count / 2));
2723 vec_perm_indices indices (sel, 1, count);
2724 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2725 indices);
2727 else if (known_eq (nunits, gather_off_nunits * 2))
2729 modifier = NARROW;
2731 /* Currently narrowing gathers and scatters are only supported for
2732 fixed-length vectors. */
2733 int count = nunits.to_constant ();
2734 vec_perm_builder sel (count, count, 1);
2735 sel.quick_grow (count);
2736 for (int i = 0; i < count; ++i)
2737 sel[i] = i < count / 2 ? i : i + count / 2;
2738 vec_perm_indices indices (sel, 2, count);
2739 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2741 ncopies *= 2;
2743 if (mask)
2745 for (int i = 0; i < count; ++i)
2746 sel[i] = i | (count / 2);
2747 indices.new_vector (sel, 2, count);
2748 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2751 else
2752 gcc_unreachable ();
2754 tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
2755 vectype);
2757 tree ptr = fold_convert (ptrtype, gs_info->base);
2758 if (!is_gimple_min_invariant (ptr))
2760 gimple_seq seq;
2761 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2762 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2763 gcc_assert (!new_bb);
2766 tree scale = build_int_cst (scaletype, gs_info->scale);
2768 tree vec_oprnd0 = NULL_TREE;
2769 tree vec_mask = NULL_TREE;
2770 tree src_op = NULL_TREE;
2771 tree mask_op = NULL_TREE;
2772 tree prev_res = NULL_TREE;
2773 stmt_vec_info prev_stmt_info = NULL;
2775 if (!mask)
2777 src_op = vect_build_zero_merge_argument (stmt, rettype);
2778 mask_op = vect_build_all_ones_mask (stmt, masktype);
2781 for (int j = 0; j < ncopies; ++j)
2783 tree op, var;
2784 gimple *new_stmt;
2785 if (modifier == WIDEN && (j & 1))
2786 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2787 perm_mask, stmt, gsi);
2788 else if (j == 0)
2789 op = vec_oprnd0
2790 = vect_get_vec_def_for_operand (gs_info->offset, stmt);
2791 else
2792 op = vec_oprnd0
2793 = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
2795 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2797 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2798 TYPE_VECTOR_SUBPARTS (idxtype)));
2799 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2800 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2801 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2802 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2803 op = var;
2806 if (mask)
2808 if (mask_perm_mask && (j & 1))
2809 mask_op = permute_vec_elements (mask_op, mask_op,
2810 mask_perm_mask, stmt, gsi);
2811 else
2813 if (j == 0)
2814 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2815 else
2816 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
2818 mask_op = vec_mask;
2819 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2821 gcc_assert
2822 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2823 TYPE_VECTOR_SUBPARTS (masktype)));
2824 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2825 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2826 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR,
2827 mask_op);
2828 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2829 mask_op = var;
2832 src_op = mask_op;
2835 new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2836 mask_op, scale);
2838 if (!useless_type_conversion_p (vectype, rettype))
2840 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2841 TYPE_VECTOR_SUBPARTS (rettype)));
2842 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2843 gimple_call_set_lhs (new_stmt, op);
2844 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2845 var = make_ssa_name (vec_dest);
2846 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2847 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2849 else
2851 var = make_ssa_name (vec_dest, new_stmt);
2852 gimple_call_set_lhs (new_stmt, var);
2855 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2857 if (modifier == NARROW)
2859 if ((j & 1) == 0)
2861 prev_res = var;
2862 continue;
2864 var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
2865 new_stmt = SSA_NAME_DEF_STMT (var);
2868 if (prev_stmt_info == NULL)
2869 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2870 else
2871 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2872 prev_stmt_info = vinfo_for_stmt (new_stmt);
2876 /* Prepare the base and offset in GS_INFO for vectorization.
2877 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2878 to the vectorized offset argument for the first copy of STMT. STMT
2879 is the statement described by GS_INFO and LOOP is the containing loop. */
2881 static void
2882 vect_get_gather_scatter_ops (struct loop *loop, gimple *stmt,
2883 gather_scatter_info *gs_info,
2884 tree *dataref_ptr, tree *vec_offset)
2886 gimple_seq stmts = NULL;
2887 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2888 if (stmts != NULL)
2890 basic_block new_bb;
2891 edge pe = loop_preheader_edge (loop);
2892 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2893 gcc_assert (!new_bb);
2895 tree offset_type = TREE_TYPE (gs_info->offset);
2896 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2897 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt,
2898 offset_vectype);
2901 /* Prepare to implement a grouped or strided load or store using
2902 the gather load or scatter store operation described by GS_INFO.
2903 STMT is the load or store statement.
2905 Set *DATAREF_BUMP to the amount that should be added to the base
2906 address after each copy of the vectorized statement. Set *VEC_OFFSET
2907 to an invariant offset vector in which element I has the value
2908 I * DR_STEP / SCALE. */
2910 static void
2911 vect_get_strided_load_store_ops (gimple *stmt, loop_vec_info loop_vinfo,
2912 gather_scatter_info *gs_info,
2913 tree *dataref_bump, tree *vec_offset)
2915 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2916 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2917 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2918 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2919 gimple_seq stmts;
2921 tree bump = size_binop (MULT_EXPR,
2922 fold_convert (sizetype, DR_STEP (dr)),
2923 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2924 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2925 if (stmts)
2926 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2928 /* The offset given in GS_INFO can have pointer type, so use the element
2929 type of the vector instead. */
2930 tree offset_type = TREE_TYPE (gs_info->offset);
2931 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2932 offset_type = TREE_TYPE (offset_vectype);
2934 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2935 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2936 ssize_int (gs_info->scale));
2937 step = fold_convert (offset_type, step);
2938 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2940 /* Create {0, X, X*2, X*3, ...}. */
2941 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
2942 build_zero_cst (offset_type), step);
2943 if (stmts)
2944 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2947 /* Return the amount that should be added to a vector pointer to move
2948 to the next or previous copy of AGGR_TYPE. DR is the data reference
2949 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2950 vectorization. */
2952 static tree
2953 vect_get_data_ptr_increment (data_reference *dr, tree aggr_type,
2954 vect_memory_access_type memory_access_type)
2956 if (memory_access_type == VMAT_INVARIANT)
2957 return size_zero_node;
2959 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2960 tree step = vect_dr_behavior (dr)->step;
2961 if (tree_int_cst_sgn (step) == -1)
2962 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2963 return iv_step;
2966 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2968 static bool
2969 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2970 gimple **vec_stmt, slp_tree slp_node,
2971 tree vectype_in, enum vect_def_type *dt,
2972 stmt_vector_for_cost *cost_vec)
2974 tree op, vectype;
2975 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2976 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2977 unsigned ncopies;
2978 unsigned HOST_WIDE_INT nunits, num_bytes;
2980 op = gimple_call_arg (stmt, 0);
2981 vectype = STMT_VINFO_VECTYPE (stmt_info);
2983 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
2984 return false;
2986 /* Multiple types in SLP are handled by creating the appropriate number of
2987 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2988 case of SLP. */
2989 if (slp_node)
2990 ncopies = 1;
2991 else
2992 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2994 gcc_assert (ncopies >= 1);
2996 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2997 if (! char_vectype)
2998 return false;
3000 if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
3001 return false;
3003 unsigned word_bytes = num_bytes / nunits;
3005 /* The encoding uses one stepped pattern for each byte in the word. */
3006 vec_perm_builder elts (num_bytes, word_bytes, 3);
3007 for (unsigned i = 0; i < 3; ++i)
3008 for (unsigned j = 0; j < word_bytes; ++j)
3009 elts.quick_push ((i + 1) * word_bytes - j - 1);
3011 vec_perm_indices indices (elts, 1, num_bytes);
3012 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3013 return false;
3015 if (! vec_stmt)
3017 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3018 DUMP_VECT_SCOPE ("vectorizable_bswap");
3019 if (! slp_node)
3021 record_stmt_cost (cost_vec,
3022 1, vector_stmt, stmt_info, 0, vect_prologue);
3023 record_stmt_cost (cost_vec,
3024 ncopies, vec_perm, stmt_info, 0, vect_body);
3026 return true;
3029 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3031 /* Transform. */
3032 vec<tree> vec_oprnds = vNULL;
3033 gimple *new_stmt = NULL;
3034 stmt_vec_info prev_stmt_info = NULL;
3035 for (unsigned j = 0; j < ncopies; j++)
3037 /* Handle uses. */
3038 if (j == 0)
3039 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
3040 else
3041 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
3043 /* Arguments are ready. create the new vector stmt. */
3044 unsigned i;
3045 tree vop;
3046 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3048 tree tem = make_ssa_name (char_vectype);
3049 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3050 char_vectype, vop));
3051 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3052 tree tem2 = make_ssa_name (char_vectype);
3053 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3054 tem, tem, bswap_vconst);
3055 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3056 tem = make_ssa_name (vectype);
3057 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3058 vectype, tem2));
3059 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3060 if (slp_node)
3061 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3064 if (slp_node)
3065 continue;
3067 if (j == 0)
3068 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3069 else
3070 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3072 prev_stmt_info = vinfo_for_stmt (new_stmt);
3075 vec_oprnds.release ();
3076 return true;
3079 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3080 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3081 in a single step. On success, store the binary pack code in
3082 *CONVERT_CODE. */
3084 static bool
3085 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3086 tree_code *convert_code)
3088 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3089 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3090 return false;
3092 tree_code code;
3093 int multi_step_cvt = 0;
3094 auto_vec <tree, 8> interm_types;
3095 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3096 &code, &multi_step_cvt,
3097 &interm_types)
3098 || multi_step_cvt)
3099 return false;
3101 *convert_code = code;
3102 return true;
3105 /* Function vectorizable_call.
3107 Check if GS performs a function call that can be vectorized.
3108 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3109 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3110 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3112 static bool
3113 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
3114 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
3116 gcall *stmt;
3117 tree vec_dest;
3118 tree scalar_dest;
3119 tree op;
3120 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3121 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
3122 tree vectype_out, vectype_in;
3123 poly_uint64 nunits_in;
3124 poly_uint64 nunits_out;
3125 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3126 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3127 vec_info *vinfo = stmt_info->vinfo;
3128 tree fndecl, new_temp, rhs_type;
3129 enum vect_def_type dt[4]
3130 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3131 vect_unknown_def_type };
3132 int ndts = ARRAY_SIZE (dt);
3133 gimple *new_stmt = NULL;
3134 int ncopies, j;
3135 auto_vec<tree, 8> vargs;
3136 auto_vec<tree, 8> orig_vargs;
3137 enum { NARROW, NONE, WIDEN } modifier;
3138 size_t i, nargs;
3139 tree lhs;
3141 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3142 return false;
3144 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3145 && ! vec_stmt)
3146 return false;
3148 /* Is GS a vectorizable call? */
3149 stmt = dyn_cast <gcall *> (gs);
3150 if (!stmt)
3151 return false;
3153 if (gimple_call_internal_p (stmt)
3154 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3155 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3156 /* Handled by vectorizable_load and vectorizable_store. */
3157 return false;
3159 if (gimple_call_lhs (stmt) == NULL_TREE
3160 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3161 return false;
3163 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3165 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3167 /* Process function arguments. */
3168 rhs_type = NULL_TREE;
3169 vectype_in = NULL_TREE;
3170 nargs = gimple_call_num_args (stmt);
3172 /* Bail out if the function has more than three arguments, we do not have
3173 interesting builtin functions to vectorize with more than two arguments
3174 except for fma. No arguments is also not good. */
3175 if (nargs == 0 || nargs > 4)
3176 return false;
3178 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
3179 combined_fn cfn = gimple_call_combined_fn (stmt);
3180 if (cfn == CFN_GOMP_SIMD_LANE)
3182 nargs = 0;
3183 rhs_type = unsigned_type_node;
3186 int mask_opno = -1;
3187 if (internal_fn_p (cfn))
3188 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3190 for (i = 0; i < nargs; i++)
3192 tree opvectype;
3194 op = gimple_call_arg (stmt, i);
3195 if (!vect_is_simple_use (op, vinfo, &dt[i], &opvectype))
3197 if (dump_enabled_p ())
3198 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3199 "use not simple.\n");
3200 return false;
3203 /* Skip the mask argument to an internal function. This operand
3204 has been converted via a pattern if necessary. */
3205 if ((int) i == mask_opno)
3206 continue;
3208 /* We can only handle calls with arguments of the same type. */
3209 if (rhs_type
3210 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3212 if (dump_enabled_p ())
3213 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3214 "argument types differ.\n");
3215 return false;
3217 if (!rhs_type)
3218 rhs_type = TREE_TYPE (op);
3220 if (!vectype_in)
3221 vectype_in = opvectype;
3222 else if (opvectype
3223 && opvectype != vectype_in)
3225 if (dump_enabled_p ())
3226 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3227 "argument vector types differ.\n");
3228 return false;
3231 /* If all arguments are external or constant defs use a vector type with
3232 the same size as the output vector type. */
3233 if (!vectype_in)
3234 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3235 if (vec_stmt)
3236 gcc_assert (vectype_in);
3237 if (!vectype_in)
3239 if (dump_enabled_p ())
3241 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3242 "no vectype for scalar type ");
3243 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3244 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3247 return false;
3250 /* FORNOW */
3251 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3252 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3253 if (known_eq (nunits_in * 2, nunits_out))
3254 modifier = NARROW;
3255 else if (known_eq (nunits_out, nunits_in))
3256 modifier = NONE;
3257 else if (known_eq (nunits_out * 2, nunits_in))
3258 modifier = WIDEN;
3259 else
3260 return false;
3262 /* We only handle functions that do not read or clobber memory. */
3263 if (gimple_vuse (stmt))
3265 if (dump_enabled_p ())
3266 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3267 "function reads from or writes to memory.\n");
3268 return false;
3271 /* For now, we only vectorize functions if a target specific builtin
3272 is available. TODO -- in some cases, it might be profitable to
3273 insert the calls for pieces of the vector, in order to be able
3274 to vectorize other operations in the loop. */
3275 fndecl = NULL_TREE;
3276 internal_fn ifn = IFN_LAST;
3277 tree callee = gimple_call_fndecl (stmt);
3279 /* First try using an internal function. */
3280 tree_code convert_code = ERROR_MARK;
3281 if (cfn != CFN_LAST
3282 && (modifier == NONE
3283 || (modifier == NARROW
3284 && simple_integer_narrowing (vectype_out, vectype_in,
3285 &convert_code))))
3286 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3287 vectype_in);
3289 /* If that fails, try asking for a target-specific built-in function. */
3290 if (ifn == IFN_LAST)
3292 if (cfn != CFN_LAST)
3293 fndecl = targetm.vectorize.builtin_vectorized_function
3294 (cfn, vectype_out, vectype_in);
3295 else if (callee)
3296 fndecl = targetm.vectorize.builtin_md_vectorized_function
3297 (callee, vectype_out, vectype_in);
3300 if (ifn == IFN_LAST && !fndecl)
3302 if (cfn == CFN_GOMP_SIMD_LANE
3303 && !slp_node
3304 && loop_vinfo
3305 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3306 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3307 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3308 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3310 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3311 { 0, 1, 2, ... vf - 1 } vector. */
3312 gcc_assert (nargs == 0);
3314 else if (modifier == NONE
3315 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3316 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3317 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3318 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
3319 vectype_in, dt, cost_vec);
3320 else
3322 if (dump_enabled_p ())
3323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3324 "function is not vectorizable.\n");
3325 return false;
3329 if (slp_node)
3330 ncopies = 1;
3331 else if (modifier == NARROW && ifn == IFN_LAST)
3332 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3333 else
3334 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3336 /* Sanity check: make sure that at least one copy of the vectorized stmt
3337 needs to be generated. */
3338 gcc_assert (ncopies >= 1);
3340 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
3341 if (!vec_stmt) /* transformation not required. */
3343 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3344 DUMP_VECT_SCOPE ("vectorizable_call");
3345 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3346 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3347 record_stmt_cost (cost_vec, ncopies / 2,
3348 vec_promote_demote, stmt_info, 0, vect_body);
3350 if (loop_vinfo && mask_opno >= 0)
3352 unsigned int nvectors = (slp_node
3353 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3354 : ncopies);
3355 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out);
3357 return true;
3360 /* Transform. */
3362 if (dump_enabled_p ())
3363 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3365 /* Handle def. */
3366 scalar_dest = gimple_call_lhs (stmt);
3367 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3369 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3371 prev_stmt_info = NULL;
3372 if (modifier == NONE || ifn != IFN_LAST)
3374 tree prev_res = NULL_TREE;
3375 vargs.safe_grow (nargs);
3376 orig_vargs.safe_grow (nargs);
3377 for (j = 0; j < ncopies; ++j)
3379 /* Build argument list for the vectorized call. */
3380 if (slp_node)
3382 auto_vec<vec<tree> > vec_defs (nargs);
3383 vec<tree> vec_oprnds0;
3385 for (i = 0; i < nargs; i++)
3386 vargs[i] = gimple_call_arg (stmt, i);
3387 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3388 vec_oprnds0 = vec_defs[0];
3390 /* Arguments are ready. Create the new vector stmt. */
3391 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3393 size_t k;
3394 for (k = 0; k < nargs; k++)
3396 vec<tree> vec_oprndsk = vec_defs[k];
3397 vargs[k] = vec_oprndsk[i];
3399 if (modifier == NARROW)
3401 /* We don't define any narrowing conditional functions
3402 at present. */
3403 gcc_assert (mask_opno < 0);
3404 tree half_res = make_ssa_name (vectype_in);
3405 gcall *call
3406 = gimple_build_call_internal_vec (ifn, vargs);
3407 gimple_call_set_lhs (call, half_res);
3408 gimple_call_set_nothrow (call, true);
3409 new_stmt = call;
3410 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3411 if ((i & 1) == 0)
3413 prev_res = half_res;
3414 continue;
3416 new_temp = make_ssa_name (vec_dest);
3417 new_stmt = gimple_build_assign (new_temp, convert_code,
3418 prev_res, half_res);
3420 else
3422 if (mask_opno >= 0 && masked_loop_p)
3424 unsigned int vec_num = vec_oprnds0.length ();
3425 /* Always true for SLP. */
3426 gcc_assert (ncopies == 1);
3427 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3428 vectype_out, i);
3429 vargs[mask_opno] = prepare_load_store_mask
3430 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3433 gcall *call;
3434 if (ifn != IFN_LAST)
3435 call = gimple_build_call_internal_vec (ifn, vargs);
3436 else
3437 call = gimple_build_call_vec (fndecl, vargs);
3438 new_temp = make_ssa_name (vec_dest, call);
3439 gimple_call_set_lhs (call, new_temp);
3440 gimple_call_set_nothrow (call, true);
3441 new_stmt = call;
3443 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3444 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3447 for (i = 0; i < nargs; i++)
3449 vec<tree> vec_oprndsi = vec_defs[i];
3450 vec_oprndsi.release ();
3452 continue;
3455 for (i = 0; i < nargs; i++)
3457 op = gimple_call_arg (stmt, i);
3458 if (j == 0)
3459 vec_oprnd0
3460 = vect_get_vec_def_for_operand (op, stmt);
3461 else
3462 vec_oprnd0
3463 = vect_get_vec_def_for_stmt_copy (dt[i], orig_vargs[i]);
3465 orig_vargs[i] = vargs[i] = vec_oprnd0;
3468 if (mask_opno >= 0 && masked_loop_p)
3470 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3471 vectype_out, j);
3472 vargs[mask_opno]
3473 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3474 vargs[mask_opno], gsi);
3477 if (cfn == CFN_GOMP_SIMD_LANE)
3479 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3480 tree new_var
3481 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3482 gimple *init_stmt = gimple_build_assign (new_var, cst);
3483 vect_init_vector_1 (stmt, init_stmt, NULL);
3484 new_temp = make_ssa_name (vec_dest);
3485 new_stmt = gimple_build_assign (new_temp, new_var);
3487 else if (modifier == NARROW)
3489 /* We don't define any narrowing conditional functions at
3490 present. */
3491 gcc_assert (mask_opno < 0);
3492 tree half_res = make_ssa_name (vectype_in);
3493 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3494 gimple_call_set_lhs (call, half_res);
3495 gimple_call_set_nothrow (call, true);
3496 new_stmt = call;
3497 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3498 if ((j & 1) == 0)
3500 prev_res = half_res;
3501 continue;
3503 new_temp = make_ssa_name (vec_dest);
3504 new_stmt = gimple_build_assign (new_temp, convert_code,
3505 prev_res, half_res);
3507 else
3509 gcall *call;
3510 if (ifn != IFN_LAST)
3511 call = gimple_build_call_internal_vec (ifn, vargs);
3512 else
3513 call = gimple_build_call_vec (fndecl, vargs);
3514 new_temp = make_ssa_name (vec_dest, new_stmt);
3515 gimple_call_set_lhs (call, new_temp);
3516 gimple_call_set_nothrow (call, true);
3517 new_stmt = call;
3519 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3521 if (j == (modifier == NARROW ? 1 : 0))
3522 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3523 else
3524 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3526 prev_stmt_info = vinfo_for_stmt (new_stmt);
3529 else if (modifier == NARROW)
3531 /* We don't define any narrowing conditional functions at present. */
3532 gcc_assert (mask_opno < 0);
3533 for (j = 0; j < ncopies; ++j)
3535 /* Build argument list for the vectorized call. */
3536 if (j == 0)
3537 vargs.create (nargs * 2);
3538 else
3539 vargs.truncate (0);
3541 if (slp_node)
3543 auto_vec<vec<tree> > vec_defs (nargs);
3544 vec<tree> vec_oprnds0;
3546 for (i = 0; i < nargs; i++)
3547 vargs.quick_push (gimple_call_arg (stmt, i));
3548 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3549 vec_oprnds0 = vec_defs[0];
3551 /* Arguments are ready. Create the new vector stmt. */
3552 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3554 size_t k;
3555 vargs.truncate (0);
3556 for (k = 0; k < nargs; k++)
3558 vec<tree> vec_oprndsk = vec_defs[k];
3559 vargs.quick_push (vec_oprndsk[i]);
3560 vargs.quick_push (vec_oprndsk[i + 1]);
3562 gcall *call;
3563 if (ifn != IFN_LAST)
3564 call = gimple_build_call_internal_vec (ifn, vargs);
3565 else
3566 call = gimple_build_call_vec (fndecl, vargs);
3567 new_temp = make_ssa_name (vec_dest, call);
3568 gimple_call_set_lhs (call, new_temp);
3569 gimple_call_set_nothrow (call, true);
3570 new_stmt = call;
3571 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3572 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3575 for (i = 0; i < nargs; i++)
3577 vec<tree> vec_oprndsi = vec_defs[i];
3578 vec_oprndsi.release ();
3580 continue;
3583 for (i = 0; i < nargs; i++)
3585 op = gimple_call_arg (stmt, i);
3586 if (j == 0)
3588 vec_oprnd0
3589 = vect_get_vec_def_for_operand (op, stmt);
3590 vec_oprnd1
3591 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3593 else
3595 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3596 vec_oprnd0
3597 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3598 vec_oprnd1
3599 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3602 vargs.quick_push (vec_oprnd0);
3603 vargs.quick_push (vec_oprnd1);
3606 new_stmt = gimple_build_call_vec (fndecl, vargs);
3607 new_temp = make_ssa_name (vec_dest, new_stmt);
3608 gimple_call_set_lhs (new_stmt, new_temp);
3609 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3611 if (j == 0)
3612 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3613 else
3614 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3616 prev_stmt_info = vinfo_for_stmt (new_stmt);
3619 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3621 else
3622 /* No current target implements this case. */
3623 return false;
3625 vargs.release ();
3627 /* The call in STMT might prevent it from being removed in dce.
3628 We however cannot remove it here, due to the way the ssa name
3629 it defines is mapped to the new definition. So just replace
3630 rhs of the statement with something harmless. */
3632 if (slp_node)
3633 return true;
3635 if (is_pattern_stmt_p (stmt_info))
3636 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
3637 lhs = gimple_get_lhs (stmt_info->stmt);
3639 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3640 set_vinfo_for_stmt (new_stmt, stmt_info);
3641 set_vinfo_for_stmt (stmt_info->stmt, NULL);
3642 STMT_VINFO_STMT (stmt_info) = new_stmt;
3643 gsi_replace (gsi, new_stmt, false);
3645 return true;
3649 struct simd_call_arg_info
3651 tree vectype;
3652 tree op;
3653 HOST_WIDE_INT linear_step;
3654 enum vect_def_type dt;
3655 unsigned int align;
3656 bool simd_lane_linear;
3659 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3660 is linear within simd lane (but not within whole loop), note it in
3661 *ARGINFO. */
3663 static void
3664 vect_simd_lane_linear (tree op, struct loop *loop,
3665 struct simd_call_arg_info *arginfo)
3667 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3669 if (!is_gimple_assign (def_stmt)
3670 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3671 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3672 return;
3674 tree base = gimple_assign_rhs1 (def_stmt);
3675 HOST_WIDE_INT linear_step = 0;
3676 tree v = gimple_assign_rhs2 (def_stmt);
3677 while (TREE_CODE (v) == SSA_NAME)
3679 tree t;
3680 def_stmt = SSA_NAME_DEF_STMT (v);
3681 if (is_gimple_assign (def_stmt))
3682 switch (gimple_assign_rhs_code (def_stmt))
3684 case PLUS_EXPR:
3685 t = gimple_assign_rhs2 (def_stmt);
3686 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3687 return;
3688 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3689 v = gimple_assign_rhs1 (def_stmt);
3690 continue;
3691 case MULT_EXPR:
3692 t = gimple_assign_rhs2 (def_stmt);
3693 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3694 return;
3695 linear_step = tree_to_shwi (t);
3696 v = gimple_assign_rhs1 (def_stmt);
3697 continue;
3698 CASE_CONVERT:
3699 t = gimple_assign_rhs1 (def_stmt);
3700 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3701 || (TYPE_PRECISION (TREE_TYPE (v))
3702 < TYPE_PRECISION (TREE_TYPE (t))))
3703 return;
3704 if (!linear_step)
3705 linear_step = 1;
3706 v = t;
3707 continue;
3708 default:
3709 return;
3711 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3712 && loop->simduid
3713 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3714 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3715 == loop->simduid))
3717 if (!linear_step)
3718 linear_step = 1;
3719 arginfo->linear_step = linear_step;
3720 arginfo->op = base;
3721 arginfo->simd_lane_linear = true;
3722 return;
3727 /* Return the number of elements in vector type VECTYPE, which is associated
3728 with a SIMD clone. At present these vectors always have a constant
3729 length. */
3731 static unsigned HOST_WIDE_INT
3732 simd_clone_subparts (tree vectype)
3734 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3737 /* Function vectorizable_simd_clone_call.
3739 Check if STMT performs a function call that can be vectorized
3740 by calling a simd clone of the function.
3741 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3742 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3743 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3745 static bool
3746 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3747 gimple **vec_stmt, slp_tree slp_node,
3748 stmt_vector_for_cost *)
3750 tree vec_dest;
3751 tree scalar_dest;
3752 tree op, type;
3753 tree vec_oprnd0 = NULL_TREE;
3754 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3755 tree vectype;
3756 unsigned int nunits;
3757 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3758 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3759 vec_info *vinfo = stmt_info->vinfo;
3760 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3761 tree fndecl, new_temp;
3762 gimple *new_stmt = NULL;
3763 int ncopies, j;
3764 auto_vec<simd_call_arg_info> arginfo;
3765 vec<tree> vargs = vNULL;
3766 size_t i, nargs;
3767 tree lhs, rtype, ratype;
3768 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3770 /* Is STMT a vectorizable call? */
3771 if (!is_gimple_call (stmt))
3772 return false;
3774 fndecl = gimple_call_fndecl (stmt);
3775 if (fndecl == NULL_TREE)
3776 return false;
3778 struct cgraph_node *node = cgraph_node::get (fndecl);
3779 if (node == NULL || node->simd_clones == NULL)
3780 return false;
3782 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3783 return false;
3785 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3786 && ! vec_stmt)
3787 return false;
3789 if (gimple_call_lhs (stmt)
3790 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3791 return false;
3793 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3795 vectype = STMT_VINFO_VECTYPE (stmt_info);
3797 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3798 return false;
3800 /* FORNOW */
3801 if (slp_node)
3802 return false;
3804 /* Process function arguments. */
3805 nargs = gimple_call_num_args (stmt);
3807 /* Bail out if the function has zero arguments. */
3808 if (nargs == 0)
3809 return false;
3811 arginfo.reserve (nargs, true);
3813 for (i = 0; i < nargs; i++)
3815 simd_call_arg_info thisarginfo;
3816 affine_iv iv;
3818 thisarginfo.linear_step = 0;
3819 thisarginfo.align = 0;
3820 thisarginfo.op = NULL_TREE;
3821 thisarginfo.simd_lane_linear = false;
3823 op = gimple_call_arg (stmt, i);
3824 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3825 &thisarginfo.vectype)
3826 || thisarginfo.dt == vect_uninitialized_def)
3828 if (dump_enabled_p ())
3829 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3830 "use not simple.\n");
3831 return false;
3834 if (thisarginfo.dt == vect_constant_def
3835 || thisarginfo.dt == vect_external_def)
3836 gcc_assert (thisarginfo.vectype == NULL_TREE);
3837 else
3838 gcc_assert (thisarginfo.vectype != NULL_TREE);
3840 /* For linear arguments, the analyze phase should have saved
3841 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3842 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3843 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3845 gcc_assert (vec_stmt);
3846 thisarginfo.linear_step
3847 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3848 thisarginfo.op
3849 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3850 thisarginfo.simd_lane_linear
3851 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3852 == boolean_true_node);
3853 /* If loop has been peeled for alignment, we need to adjust it. */
3854 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3855 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3856 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3858 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3859 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3860 tree opt = TREE_TYPE (thisarginfo.op);
3861 bias = fold_convert (TREE_TYPE (step), bias);
3862 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3863 thisarginfo.op
3864 = fold_build2 (POINTER_TYPE_P (opt)
3865 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3866 thisarginfo.op, bias);
3869 else if (!vec_stmt
3870 && thisarginfo.dt != vect_constant_def
3871 && thisarginfo.dt != vect_external_def
3872 && loop_vinfo
3873 && TREE_CODE (op) == SSA_NAME
3874 && simple_iv (loop, loop_containing_stmt (stmt), op,
3875 &iv, false)
3876 && tree_fits_shwi_p (iv.step))
3878 thisarginfo.linear_step = tree_to_shwi (iv.step);
3879 thisarginfo.op = iv.base;
3881 else if ((thisarginfo.dt == vect_constant_def
3882 || thisarginfo.dt == vect_external_def)
3883 && POINTER_TYPE_P (TREE_TYPE (op)))
3884 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3885 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3886 linear too. */
3887 if (POINTER_TYPE_P (TREE_TYPE (op))
3888 && !thisarginfo.linear_step
3889 && !vec_stmt
3890 && thisarginfo.dt != vect_constant_def
3891 && thisarginfo.dt != vect_external_def
3892 && loop_vinfo
3893 && !slp_node
3894 && TREE_CODE (op) == SSA_NAME)
3895 vect_simd_lane_linear (op, loop, &thisarginfo);
3897 arginfo.quick_push (thisarginfo);
3900 unsigned HOST_WIDE_INT vf;
3901 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3903 if (dump_enabled_p ())
3904 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3905 "not considering SIMD clones; not yet supported"
3906 " for variable-width vectors.\n");
3907 return NULL;
3910 unsigned int badness = 0;
3911 struct cgraph_node *bestn = NULL;
3912 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3913 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3914 else
3915 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3916 n = n->simdclone->next_clone)
3918 unsigned int this_badness = 0;
3919 if (n->simdclone->simdlen > vf
3920 || n->simdclone->nargs != nargs)
3921 continue;
3922 if (n->simdclone->simdlen < vf)
3923 this_badness += (exact_log2 (vf)
3924 - exact_log2 (n->simdclone->simdlen)) * 1024;
3925 if (n->simdclone->inbranch)
3926 this_badness += 2048;
3927 int target_badness = targetm.simd_clone.usable (n);
3928 if (target_badness < 0)
3929 continue;
3930 this_badness += target_badness * 512;
3931 /* FORNOW: Have to add code to add the mask argument. */
3932 if (n->simdclone->inbranch)
3933 continue;
3934 for (i = 0; i < nargs; i++)
3936 switch (n->simdclone->args[i].arg_type)
3938 case SIMD_CLONE_ARG_TYPE_VECTOR:
3939 if (!useless_type_conversion_p
3940 (n->simdclone->args[i].orig_type,
3941 TREE_TYPE (gimple_call_arg (stmt, i))))
3942 i = -1;
3943 else if (arginfo[i].dt == vect_constant_def
3944 || arginfo[i].dt == vect_external_def
3945 || arginfo[i].linear_step)
3946 this_badness += 64;
3947 break;
3948 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3949 if (arginfo[i].dt != vect_constant_def
3950 && arginfo[i].dt != vect_external_def)
3951 i = -1;
3952 break;
3953 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3954 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3955 if (arginfo[i].dt == vect_constant_def
3956 || arginfo[i].dt == vect_external_def
3957 || (arginfo[i].linear_step
3958 != n->simdclone->args[i].linear_step))
3959 i = -1;
3960 break;
3961 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3962 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3963 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3964 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3965 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3966 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3967 /* FORNOW */
3968 i = -1;
3969 break;
3970 case SIMD_CLONE_ARG_TYPE_MASK:
3971 gcc_unreachable ();
3973 if (i == (size_t) -1)
3974 break;
3975 if (n->simdclone->args[i].alignment > arginfo[i].align)
3977 i = -1;
3978 break;
3980 if (arginfo[i].align)
3981 this_badness += (exact_log2 (arginfo[i].align)
3982 - exact_log2 (n->simdclone->args[i].alignment));
3984 if (i == (size_t) -1)
3985 continue;
3986 if (bestn == NULL || this_badness < badness)
3988 bestn = n;
3989 badness = this_badness;
3993 if (bestn == NULL)
3994 return false;
3996 for (i = 0; i < nargs; i++)
3997 if ((arginfo[i].dt == vect_constant_def
3998 || arginfo[i].dt == vect_external_def)
3999 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4001 arginfo[i].vectype
4002 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
4003 i)));
4004 if (arginfo[i].vectype == NULL
4005 || (simd_clone_subparts (arginfo[i].vectype)
4006 > bestn->simdclone->simdlen))
4007 return false;
4010 fndecl = bestn->decl;
4011 nunits = bestn->simdclone->simdlen;
4012 ncopies = vf / nunits;
4014 /* If the function isn't const, only allow it in simd loops where user
4015 has asserted that at least nunits consecutive iterations can be
4016 performed using SIMD instructions. */
4017 if ((loop == NULL || (unsigned) loop->safelen < nunits)
4018 && gimple_vuse (stmt))
4019 return false;
4021 /* Sanity check: make sure that at least one copy of the vectorized stmt
4022 needs to be generated. */
4023 gcc_assert (ncopies >= 1);
4025 if (!vec_stmt) /* transformation not required. */
4027 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4028 for (i = 0; i < nargs; i++)
4029 if ((bestn->simdclone->args[i].arg_type
4030 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4031 || (bestn->simdclone->args[i].arg_type
4032 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4034 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4035 + 1);
4036 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4037 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4038 ? size_type_node : TREE_TYPE (arginfo[i].op);
4039 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4040 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4041 tree sll = arginfo[i].simd_lane_linear
4042 ? boolean_true_node : boolean_false_node;
4043 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4045 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4046 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4047 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4048 return true;
4051 /* Transform. */
4053 if (dump_enabled_p ())
4054 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4056 /* Handle def. */
4057 scalar_dest = gimple_call_lhs (stmt);
4058 vec_dest = NULL_TREE;
4059 rtype = NULL_TREE;
4060 ratype = NULL_TREE;
4061 if (scalar_dest)
4063 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4064 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4065 if (TREE_CODE (rtype) == ARRAY_TYPE)
4067 ratype = rtype;
4068 rtype = TREE_TYPE (ratype);
4072 prev_stmt_info = NULL;
4073 for (j = 0; j < ncopies; ++j)
4075 /* Build argument list for the vectorized call. */
4076 if (j == 0)
4077 vargs.create (nargs);
4078 else
4079 vargs.truncate (0);
4081 for (i = 0; i < nargs; i++)
4083 unsigned int k, l, m, o;
4084 tree atype;
4085 op = gimple_call_arg (stmt, i);
4086 switch (bestn->simdclone->args[i].arg_type)
4088 case SIMD_CLONE_ARG_TYPE_VECTOR:
4089 atype = bestn->simdclone->args[i].vector_type;
4090 o = nunits / simd_clone_subparts (atype);
4091 for (m = j * o; m < (j + 1) * o; m++)
4093 if (simd_clone_subparts (atype)
4094 < simd_clone_subparts (arginfo[i].vectype))
4096 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4097 k = (simd_clone_subparts (arginfo[i].vectype)
4098 / simd_clone_subparts (atype));
4099 gcc_assert ((k & (k - 1)) == 0);
4100 if (m == 0)
4101 vec_oprnd0
4102 = vect_get_vec_def_for_operand (op, stmt);
4103 else
4105 vec_oprnd0 = arginfo[i].op;
4106 if ((m & (k - 1)) == 0)
4107 vec_oprnd0
4108 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
4109 vec_oprnd0);
4111 arginfo[i].op = vec_oprnd0;
4112 vec_oprnd0
4113 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4114 bitsize_int (prec),
4115 bitsize_int ((m & (k - 1)) * prec));
4116 new_stmt
4117 = gimple_build_assign (make_ssa_name (atype),
4118 vec_oprnd0);
4119 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4120 vargs.safe_push (gimple_assign_lhs (new_stmt));
4122 else
4124 k = (simd_clone_subparts (atype)
4125 / simd_clone_subparts (arginfo[i].vectype));
4126 gcc_assert ((k & (k - 1)) == 0);
4127 vec<constructor_elt, va_gc> *ctor_elts;
4128 if (k != 1)
4129 vec_alloc (ctor_elts, k);
4130 else
4131 ctor_elts = NULL;
4132 for (l = 0; l < k; l++)
4134 if (m == 0 && l == 0)
4135 vec_oprnd0
4136 = vect_get_vec_def_for_operand (op, stmt);
4137 else
4138 vec_oprnd0
4139 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
4140 arginfo[i].op);
4141 arginfo[i].op = vec_oprnd0;
4142 if (k == 1)
4143 break;
4144 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4145 vec_oprnd0);
4147 if (k == 1)
4148 vargs.safe_push (vec_oprnd0);
4149 else
4151 vec_oprnd0 = build_constructor (atype, ctor_elts);
4152 new_stmt
4153 = gimple_build_assign (make_ssa_name (atype),
4154 vec_oprnd0);
4155 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4156 vargs.safe_push (gimple_assign_lhs (new_stmt));
4160 break;
4161 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4162 vargs.safe_push (op);
4163 break;
4164 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4165 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4166 if (j == 0)
4168 gimple_seq stmts;
4169 arginfo[i].op
4170 = force_gimple_operand (arginfo[i].op, &stmts, true,
4171 NULL_TREE);
4172 if (stmts != NULL)
4174 basic_block new_bb;
4175 edge pe = loop_preheader_edge (loop);
4176 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4177 gcc_assert (!new_bb);
4179 if (arginfo[i].simd_lane_linear)
4181 vargs.safe_push (arginfo[i].op);
4182 break;
4184 tree phi_res = copy_ssa_name (op);
4185 gphi *new_phi = create_phi_node (phi_res, loop->header);
4186 set_vinfo_for_stmt (new_phi,
4187 new_stmt_vec_info (new_phi, loop_vinfo));
4188 add_phi_arg (new_phi, arginfo[i].op,
4189 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4190 enum tree_code code
4191 = POINTER_TYPE_P (TREE_TYPE (op))
4192 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4193 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4194 ? sizetype : TREE_TYPE (op);
4195 widest_int cst
4196 = wi::mul (bestn->simdclone->args[i].linear_step,
4197 ncopies * nunits);
4198 tree tcst = wide_int_to_tree (type, cst);
4199 tree phi_arg = copy_ssa_name (op);
4200 new_stmt
4201 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4202 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4203 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4204 set_vinfo_for_stmt (new_stmt,
4205 new_stmt_vec_info (new_stmt, loop_vinfo));
4206 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4207 UNKNOWN_LOCATION);
4208 arginfo[i].op = phi_res;
4209 vargs.safe_push (phi_res);
4211 else
4213 enum tree_code code
4214 = POINTER_TYPE_P (TREE_TYPE (op))
4215 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4216 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4217 ? sizetype : TREE_TYPE (op);
4218 widest_int cst
4219 = wi::mul (bestn->simdclone->args[i].linear_step,
4220 j * nunits);
4221 tree tcst = wide_int_to_tree (type, cst);
4222 new_temp = make_ssa_name (TREE_TYPE (op));
4223 new_stmt = gimple_build_assign (new_temp, code,
4224 arginfo[i].op, tcst);
4225 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4226 vargs.safe_push (new_temp);
4228 break;
4229 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4230 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4231 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4232 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4233 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4234 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4235 default:
4236 gcc_unreachable ();
4240 new_stmt = gimple_build_call_vec (fndecl, vargs);
4241 if (vec_dest)
4243 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4244 if (ratype)
4245 new_temp = create_tmp_var (ratype);
4246 else if (simd_clone_subparts (vectype)
4247 == simd_clone_subparts (rtype))
4248 new_temp = make_ssa_name (vec_dest, new_stmt);
4249 else
4250 new_temp = make_ssa_name (rtype, new_stmt);
4251 gimple_call_set_lhs (new_stmt, new_temp);
4253 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4255 if (vec_dest)
4257 if (simd_clone_subparts (vectype) < nunits)
4259 unsigned int k, l;
4260 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4261 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4262 k = nunits / simd_clone_subparts (vectype);
4263 gcc_assert ((k & (k - 1)) == 0);
4264 for (l = 0; l < k; l++)
4266 tree t;
4267 if (ratype)
4269 t = build_fold_addr_expr (new_temp);
4270 t = build2 (MEM_REF, vectype, t,
4271 build_int_cst (TREE_TYPE (t), l * bytes));
4273 else
4274 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4275 bitsize_int (prec), bitsize_int (l * prec));
4276 new_stmt
4277 = gimple_build_assign (make_ssa_name (vectype), t);
4278 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4279 if (j == 0 && l == 0)
4280 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4281 else
4282 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4284 prev_stmt_info = vinfo_for_stmt (new_stmt);
4287 if (ratype)
4288 vect_clobber_variable (stmt, gsi, new_temp);
4289 continue;
4291 else if (simd_clone_subparts (vectype) > nunits)
4293 unsigned int k = (simd_clone_subparts (vectype)
4294 / simd_clone_subparts (rtype));
4295 gcc_assert ((k & (k - 1)) == 0);
4296 if ((j & (k - 1)) == 0)
4297 vec_alloc (ret_ctor_elts, k);
4298 if (ratype)
4300 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4301 for (m = 0; m < o; m++)
4303 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4304 size_int (m), NULL_TREE, NULL_TREE);
4305 new_stmt
4306 = gimple_build_assign (make_ssa_name (rtype), tem);
4307 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4308 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4309 gimple_assign_lhs (new_stmt));
4311 vect_clobber_variable (stmt, gsi, new_temp);
4313 else
4314 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4315 if ((j & (k - 1)) != k - 1)
4316 continue;
4317 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4318 new_stmt
4319 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4320 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4322 if ((unsigned) j == k - 1)
4323 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4324 else
4325 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4327 prev_stmt_info = vinfo_for_stmt (new_stmt);
4328 continue;
4330 else if (ratype)
4332 tree t = build_fold_addr_expr (new_temp);
4333 t = build2 (MEM_REF, vectype, t,
4334 build_int_cst (TREE_TYPE (t), 0));
4335 new_stmt
4336 = gimple_build_assign (make_ssa_name (vec_dest), t);
4337 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4338 vect_clobber_variable (stmt, gsi, new_temp);
4342 if (j == 0)
4343 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4344 else
4345 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4347 prev_stmt_info = vinfo_for_stmt (new_stmt);
4350 vargs.release ();
4352 /* The call in STMT might prevent it from being removed in dce.
4353 We however cannot remove it here, due to the way the ssa name
4354 it defines is mapped to the new definition. So just replace
4355 rhs of the statement with something harmless. */
4357 if (slp_node)
4358 return true;
4360 if (scalar_dest)
4362 type = TREE_TYPE (scalar_dest);
4363 if (is_pattern_stmt_p (stmt_info))
4364 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
4365 else
4366 lhs = gimple_call_lhs (stmt);
4367 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4369 else
4370 new_stmt = gimple_build_nop ();
4371 set_vinfo_for_stmt (new_stmt, stmt_info);
4372 set_vinfo_for_stmt (stmt, NULL);
4373 STMT_VINFO_STMT (stmt_info) = new_stmt;
4374 gsi_replace (gsi, new_stmt, true);
4375 unlink_stmt_vdef (stmt);
4377 return true;
4381 /* Function vect_gen_widened_results_half
4383 Create a vector stmt whose code, type, number of arguments, and result
4384 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4385 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
4386 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4387 needs to be created (DECL is a function-decl of a target-builtin).
4388 STMT is the original scalar stmt that we are vectorizing. */
4390 static gimple *
4391 vect_gen_widened_results_half (enum tree_code code,
4392 tree decl,
4393 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4394 tree vec_dest, gimple_stmt_iterator *gsi,
4395 gimple *stmt)
4397 gimple *new_stmt;
4398 tree new_temp;
4400 /* Generate half of the widened result: */
4401 if (code == CALL_EXPR)
4403 /* Target specific support */
4404 if (op_type == binary_op)
4405 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4406 else
4407 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4408 new_temp = make_ssa_name (vec_dest, new_stmt);
4409 gimple_call_set_lhs (new_stmt, new_temp);
4411 else
4413 /* Generic support */
4414 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4415 if (op_type != binary_op)
4416 vec_oprnd1 = NULL;
4417 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4418 new_temp = make_ssa_name (vec_dest, new_stmt);
4419 gimple_assign_set_lhs (new_stmt, new_temp);
4421 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4423 return new_stmt;
4427 /* Get vectorized definitions for loop-based vectorization. For the first
4428 operand we call vect_get_vec_def_for_operand() (with OPRND containing
4429 scalar operand), and for the rest we get a copy with
4430 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4431 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4432 The vectors are collected into VEC_OPRNDS. */
4434 static void
4435 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
4436 vec<tree> *vec_oprnds, int multi_step_cvt)
4438 tree vec_oprnd;
4440 /* Get first vector operand. */
4441 /* All the vector operands except the very first one (that is scalar oprnd)
4442 are stmt copies. */
4443 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4444 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4445 else
4446 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
4448 vec_oprnds->quick_push (vec_oprnd);
4450 /* Get second vector operand. */
4451 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
4452 vec_oprnds->quick_push (vec_oprnd);
4454 *oprnd = vec_oprnd;
4456 /* For conversion in multiple steps, continue to get operands
4457 recursively. */
4458 if (multi_step_cvt)
4459 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
4463 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4464 For multi-step conversions store the resulting vectors and call the function
4465 recursively. */
4467 static void
4468 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4469 int multi_step_cvt, gimple *stmt,
4470 vec<tree> vec_dsts,
4471 gimple_stmt_iterator *gsi,
4472 slp_tree slp_node, enum tree_code code,
4473 stmt_vec_info *prev_stmt_info)
4475 unsigned int i;
4476 tree vop0, vop1, new_tmp, vec_dest;
4477 gimple *new_stmt;
4478 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4480 vec_dest = vec_dsts.pop ();
4482 for (i = 0; i < vec_oprnds->length (); i += 2)
4484 /* Create demotion operation. */
4485 vop0 = (*vec_oprnds)[i];
4486 vop1 = (*vec_oprnds)[i + 1];
4487 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4488 new_tmp = make_ssa_name (vec_dest, new_stmt);
4489 gimple_assign_set_lhs (new_stmt, new_tmp);
4490 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4492 if (multi_step_cvt)
4493 /* Store the resulting vector for next recursive call. */
4494 (*vec_oprnds)[i/2] = new_tmp;
4495 else
4497 /* This is the last step of the conversion sequence. Store the
4498 vectors in SLP_NODE or in vector info of the scalar statement
4499 (or in STMT_VINFO_RELATED_STMT chain). */
4500 if (slp_node)
4501 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4502 else
4504 if (!*prev_stmt_info)
4505 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4506 else
4507 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4509 *prev_stmt_info = vinfo_for_stmt (new_stmt);
4514 /* For multi-step demotion operations we first generate demotion operations
4515 from the source type to the intermediate types, and then combine the
4516 results (stored in VEC_OPRNDS) in demotion operation to the destination
4517 type. */
4518 if (multi_step_cvt)
4520 /* At each level of recursion we have half of the operands we had at the
4521 previous level. */
4522 vec_oprnds->truncate ((i+1)/2);
4523 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4524 stmt, vec_dsts, gsi, slp_node,
4525 VEC_PACK_TRUNC_EXPR,
4526 prev_stmt_info);
4529 vec_dsts.quick_push (vec_dest);
4533 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4534 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4535 the resulting vectors and call the function recursively. */
4537 static void
4538 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4539 vec<tree> *vec_oprnds1,
4540 gimple *stmt, tree vec_dest,
4541 gimple_stmt_iterator *gsi,
4542 enum tree_code code1,
4543 enum tree_code code2, tree decl1,
4544 tree decl2, int op_type)
4546 int i;
4547 tree vop0, vop1, new_tmp1, new_tmp2;
4548 gimple *new_stmt1, *new_stmt2;
4549 vec<tree> vec_tmp = vNULL;
4551 vec_tmp.create (vec_oprnds0->length () * 2);
4552 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4554 if (op_type == binary_op)
4555 vop1 = (*vec_oprnds1)[i];
4556 else
4557 vop1 = NULL_TREE;
4559 /* Generate the two halves of promotion operation. */
4560 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4561 op_type, vec_dest, gsi, stmt);
4562 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4563 op_type, vec_dest, gsi, stmt);
4564 if (is_gimple_call (new_stmt1))
4566 new_tmp1 = gimple_call_lhs (new_stmt1);
4567 new_tmp2 = gimple_call_lhs (new_stmt2);
4569 else
4571 new_tmp1 = gimple_assign_lhs (new_stmt1);
4572 new_tmp2 = gimple_assign_lhs (new_stmt2);
4575 /* Store the results for the next step. */
4576 vec_tmp.quick_push (new_tmp1);
4577 vec_tmp.quick_push (new_tmp2);
4580 vec_oprnds0->release ();
4581 *vec_oprnds0 = vec_tmp;
4585 /* Check if STMT performs a conversion operation, that can be vectorized.
4586 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4587 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4588 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4590 static bool
4591 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4592 gimple **vec_stmt, slp_tree slp_node,
4593 stmt_vector_for_cost *cost_vec)
4595 tree vec_dest;
4596 tree scalar_dest;
4597 tree op0, op1 = NULL_TREE;
4598 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4599 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4600 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4601 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4602 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4603 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4604 tree new_temp;
4605 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4606 int ndts = 2;
4607 gimple *new_stmt = NULL;
4608 stmt_vec_info prev_stmt_info;
4609 poly_uint64 nunits_in;
4610 poly_uint64 nunits_out;
4611 tree vectype_out, vectype_in;
4612 int ncopies, i, j;
4613 tree lhs_type, rhs_type;
4614 enum { NARROW, NONE, WIDEN } modifier;
4615 vec<tree> vec_oprnds0 = vNULL;
4616 vec<tree> vec_oprnds1 = vNULL;
4617 tree vop0;
4618 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4619 vec_info *vinfo = stmt_info->vinfo;
4620 int multi_step_cvt = 0;
4621 vec<tree> interm_types = vNULL;
4622 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4623 int op_type;
4624 unsigned short fltsz;
4626 /* Is STMT a vectorizable conversion? */
4628 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4629 return false;
4631 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4632 && ! vec_stmt)
4633 return false;
4635 if (!is_gimple_assign (stmt))
4636 return false;
4638 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4639 return false;
4641 code = gimple_assign_rhs_code (stmt);
4642 if (!CONVERT_EXPR_CODE_P (code)
4643 && code != FIX_TRUNC_EXPR
4644 && code != FLOAT_EXPR
4645 && code != WIDEN_MULT_EXPR
4646 && code != WIDEN_LSHIFT_EXPR)
4647 return false;
4649 op_type = TREE_CODE_LENGTH (code);
4651 /* Check types of lhs and rhs. */
4652 scalar_dest = gimple_assign_lhs (stmt);
4653 lhs_type = TREE_TYPE (scalar_dest);
4654 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4656 op0 = gimple_assign_rhs1 (stmt);
4657 rhs_type = TREE_TYPE (op0);
4659 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4660 && !((INTEGRAL_TYPE_P (lhs_type)
4661 && INTEGRAL_TYPE_P (rhs_type))
4662 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4663 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4664 return false;
4666 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4667 && ((INTEGRAL_TYPE_P (lhs_type)
4668 && !type_has_mode_precision_p (lhs_type))
4669 || (INTEGRAL_TYPE_P (rhs_type)
4670 && !type_has_mode_precision_p (rhs_type))))
4672 if (dump_enabled_p ())
4673 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4674 "type conversion to/from bit-precision unsupported."
4675 "\n");
4676 return false;
4679 /* Check the operands of the operation. */
4680 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
4682 if (dump_enabled_p ())
4683 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4684 "use not simple.\n");
4685 return false;
4687 if (op_type == binary_op)
4689 bool ok;
4691 op1 = gimple_assign_rhs2 (stmt);
4692 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4693 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4694 OP1. */
4695 if (CONSTANT_CLASS_P (op0))
4696 ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4697 else
4698 ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4700 if (!ok)
4702 if (dump_enabled_p ())
4703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4704 "use not simple.\n");
4705 return false;
4709 /* If op0 is an external or constant defs use a vector type of
4710 the same size as the output vector type. */
4711 if (!vectype_in)
4712 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4713 if (vec_stmt)
4714 gcc_assert (vectype_in);
4715 if (!vectype_in)
4717 if (dump_enabled_p ())
4719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4720 "no vectype for scalar type ");
4721 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4722 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4725 return false;
4728 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4729 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4731 if (dump_enabled_p ())
4733 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4734 "can't convert between boolean and non "
4735 "boolean vectors");
4736 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4737 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4740 return false;
4743 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4744 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4745 if (known_eq (nunits_out, nunits_in))
4746 modifier = NONE;
4747 else if (multiple_p (nunits_out, nunits_in))
4748 modifier = NARROW;
4749 else
4751 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4752 modifier = WIDEN;
4755 /* Multiple types in SLP are handled by creating the appropriate number of
4756 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4757 case of SLP. */
4758 if (slp_node)
4759 ncopies = 1;
4760 else if (modifier == NARROW)
4761 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4762 else
4763 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4765 /* Sanity check: make sure that at least one copy of the vectorized stmt
4766 needs to be generated. */
4767 gcc_assert (ncopies >= 1);
4769 bool found_mode = false;
4770 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4771 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4772 opt_scalar_mode rhs_mode_iter;
4774 /* Supportable by target? */
4775 switch (modifier)
4777 case NONE:
4778 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4779 return false;
4780 if (supportable_convert_operation (code, vectype_out, vectype_in,
4781 &decl1, &code1))
4782 break;
4783 /* FALLTHRU */
4784 unsupported:
4785 if (dump_enabled_p ())
4786 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4787 "conversion not supported by target.\n");
4788 return false;
4790 case WIDEN:
4791 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4792 &code1, &code2, &multi_step_cvt,
4793 &interm_types))
4795 /* Binary widening operation can only be supported directly by the
4796 architecture. */
4797 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4798 break;
4801 if (code != FLOAT_EXPR
4802 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4803 goto unsupported;
4805 fltsz = GET_MODE_SIZE (lhs_mode);
4806 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4808 rhs_mode = rhs_mode_iter.require ();
4809 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4810 break;
4812 cvt_type
4813 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4814 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4815 if (cvt_type == NULL_TREE)
4816 goto unsupported;
4818 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4820 if (!supportable_convert_operation (code, vectype_out,
4821 cvt_type, &decl1, &codecvt1))
4822 goto unsupported;
4824 else if (!supportable_widening_operation (code, stmt, vectype_out,
4825 cvt_type, &codecvt1,
4826 &codecvt2, &multi_step_cvt,
4827 &interm_types))
4828 continue;
4829 else
4830 gcc_assert (multi_step_cvt == 0);
4832 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4833 vectype_in, &code1, &code2,
4834 &multi_step_cvt, &interm_types))
4836 found_mode = true;
4837 break;
4841 if (!found_mode)
4842 goto unsupported;
4844 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4845 codecvt2 = ERROR_MARK;
4846 else
4848 multi_step_cvt++;
4849 interm_types.safe_push (cvt_type);
4850 cvt_type = NULL_TREE;
4852 break;
4854 case NARROW:
4855 gcc_assert (op_type == unary_op);
4856 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4857 &code1, &multi_step_cvt,
4858 &interm_types))
4859 break;
4861 if (code != FIX_TRUNC_EXPR
4862 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4863 goto unsupported;
4865 cvt_type
4866 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4867 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4868 if (cvt_type == NULL_TREE)
4869 goto unsupported;
4870 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4871 &decl1, &codecvt1))
4872 goto unsupported;
4873 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4874 &code1, &multi_step_cvt,
4875 &interm_types))
4876 break;
4877 goto unsupported;
4879 default:
4880 gcc_unreachable ();
4883 if (!vec_stmt) /* transformation not required. */
4885 DUMP_VECT_SCOPE ("vectorizable_conversion");
4886 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4888 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4889 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
4890 cost_vec);
4892 else if (modifier == NARROW)
4894 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4895 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4896 cost_vec);
4898 else
4900 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4901 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4902 cost_vec);
4904 interm_types.release ();
4905 return true;
4908 /* Transform. */
4909 if (dump_enabled_p ())
4910 dump_printf_loc (MSG_NOTE, vect_location,
4911 "transform conversion. ncopies = %d.\n", ncopies);
4913 if (op_type == binary_op)
4915 if (CONSTANT_CLASS_P (op0))
4916 op0 = fold_convert (TREE_TYPE (op1), op0);
4917 else if (CONSTANT_CLASS_P (op1))
4918 op1 = fold_convert (TREE_TYPE (op0), op1);
4921 /* In case of multi-step conversion, we first generate conversion operations
4922 to the intermediate types, and then from that types to the final one.
4923 We create vector destinations for the intermediate type (TYPES) received
4924 from supportable_*_operation, and store them in the correct order
4925 for future use in vect_create_vectorized_*_stmts (). */
4926 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4927 vec_dest = vect_create_destination_var (scalar_dest,
4928 (cvt_type && modifier == WIDEN)
4929 ? cvt_type : vectype_out);
4930 vec_dsts.quick_push (vec_dest);
4932 if (multi_step_cvt)
4934 for (i = interm_types.length () - 1;
4935 interm_types.iterate (i, &intermediate_type); i--)
4937 vec_dest = vect_create_destination_var (scalar_dest,
4938 intermediate_type);
4939 vec_dsts.quick_push (vec_dest);
4943 if (cvt_type)
4944 vec_dest = vect_create_destination_var (scalar_dest,
4945 modifier == WIDEN
4946 ? vectype_out : cvt_type);
4948 if (!slp_node)
4950 if (modifier == WIDEN)
4952 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4953 if (op_type == binary_op)
4954 vec_oprnds1.create (1);
4956 else if (modifier == NARROW)
4957 vec_oprnds0.create (
4958 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4960 else if (code == WIDEN_LSHIFT_EXPR)
4961 vec_oprnds1.create (slp_node->vec_stmts_size);
4963 last_oprnd = op0;
4964 prev_stmt_info = NULL;
4965 switch (modifier)
4967 case NONE:
4968 for (j = 0; j < ncopies; j++)
4970 if (j == 0)
4971 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
4972 else
4973 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4975 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4977 /* Arguments are ready, create the new vector stmt. */
4978 if (code1 == CALL_EXPR)
4980 new_stmt = gimple_build_call (decl1, 1, vop0);
4981 new_temp = make_ssa_name (vec_dest, new_stmt);
4982 gimple_call_set_lhs (new_stmt, new_temp);
4984 else
4986 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4987 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4988 new_temp = make_ssa_name (vec_dest, new_stmt);
4989 gimple_assign_set_lhs (new_stmt, new_temp);
4992 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4993 if (slp_node)
4994 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4995 else
4997 if (!prev_stmt_info)
4998 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4999 else
5000 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5001 prev_stmt_info = vinfo_for_stmt (new_stmt);
5005 break;
5007 case WIDEN:
5008 /* In case the vectorization factor (VF) is bigger than the number
5009 of elements that we can fit in a vectype (nunits), we have to
5010 generate more than one vector stmt - i.e - we need to "unroll"
5011 the vector stmt by a factor VF/nunits. */
5012 for (j = 0; j < ncopies; j++)
5014 /* Handle uses. */
5015 if (j == 0)
5017 if (slp_node)
5019 if (code == WIDEN_LSHIFT_EXPR)
5021 unsigned int k;
5023 vec_oprnd1 = op1;
5024 /* Store vec_oprnd1 for every vector stmt to be created
5025 for SLP_NODE. We check during the analysis that all
5026 the shift arguments are the same. */
5027 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5028 vec_oprnds1.quick_push (vec_oprnd1);
5030 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5031 slp_node);
5033 else
5034 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
5035 &vec_oprnds1, slp_node);
5037 else
5039 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
5040 vec_oprnds0.quick_push (vec_oprnd0);
5041 if (op_type == binary_op)
5043 if (code == WIDEN_LSHIFT_EXPR)
5044 vec_oprnd1 = op1;
5045 else
5046 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
5047 vec_oprnds1.quick_push (vec_oprnd1);
5051 else
5053 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
5054 vec_oprnds0.truncate (0);
5055 vec_oprnds0.quick_push (vec_oprnd0);
5056 if (op_type == binary_op)
5058 if (code == WIDEN_LSHIFT_EXPR)
5059 vec_oprnd1 = op1;
5060 else
5061 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
5062 vec_oprnd1);
5063 vec_oprnds1.truncate (0);
5064 vec_oprnds1.quick_push (vec_oprnd1);
5068 /* Arguments are ready. Create the new vector stmts. */
5069 for (i = multi_step_cvt; i >= 0; i--)
5071 tree this_dest = vec_dsts[i];
5072 enum tree_code c1 = code1, c2 = code2;
5073 if (i == 0 && codecvt2 != ERROR_MARK)
5075 c1 = codecvt1;
5076 c2 = codecvt2;
5078 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5079 &vec_oprnds1,
5080 stmt, this_dest, gsi,
5081 c1, c2, decl1, decl2,
5082 op_type);
5085 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5087 if (cvt_type)
5089 if (codecvt1 == CALL_EXPR)
5091 new_stmt = gimple_build_call (decl1, 1, vop0);
5092 new_temp = make_ssa_name (vec_dest, new_stmt);
5093 gimple_call_set_lhs (new_stmt, new_temp);
5095 else
5097 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5098 new_temp = make_ssa_name (vec_dest);
5099 new_stmt = gimple_build_assign (new_temp, codecvt1,
5100 vop0);
5103 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5105 else
5106 new_stmt = SSA_NAME_DEF_STMT (vop0);
5108 if (slp_node)
5109 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5110 else
5112 if (!prev_stmt_info)
5113 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
5114 else
5115 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5116 prev_stmt_info = vinfo_for_stmt (new_stmt);
5121 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5122 break;
5124 case NARROW:
5125 /* In case the vectorization factor (VF) is bigger than the number
5126 of elements that we can fit in a vectype (nunits), we have to
5127 generate more than one vector stmt - i.e - we need to "unroll"
5128 the vector stmt by a factor VF/nunits. */
5129 for (j = 0; j < ncopies; j++)
5131 /* Handle uses. */
5132 if (slp_node)
5133 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5134 slp_node);
5135 else
5137 vec_oprnds0.truncate (0);
5138 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
5139 vect_pow2 (multi_step_cvt) - 1);
5142 /* Arguments are ready. Create the new vector stmts. */
5143 if (cvt_type)
5144 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5146 if (codecvt1 == CALL_EXPR)
5148 new_stmt = gimple_build_call (decl1, 1, vop0);
5149 new_temp = make_ssa_name (vec_dest, new_stmt);
5150 gimple_call_set_lhs (new_stmt, new_temp);
5152 else
5154 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5155 new_temp = make_ssa_name (vec_dest);
5156 new_stmt = gimple_build_assign (new_temp, codecvt1,
5157 vop0);
5160 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5161 vec_oprnds0[i] = new_temp;
5164 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5165 stmt, vec_dsts, gsi,
5166 slp_node, code1,
5167 &prev_stmt_info);
5170 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5171 break;
5174 vec_oprnds0.release ();
5175 vec_oprnds1.release ();
5176 interm_types.release ();
5178 return true;
5182 /* Function vectorizable_assignment.
5184 Check if STMT performs an assignment (copy) that can be vectorized.
5185 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5186 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5187 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5189 static bool
5190 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
5191 gimple **vec_stmt, slp_tree slp_node,
5192 stmt_vector_for_cost *cost_vec)
5194 tree vec_dest;
5195 tree scalar_dest;
5196 tree op;
5197 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5198 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5199 tree new_temp;
5200 enum vect_def_type dt[1] = {vect_unknown_def_type};
5201 int ndts = 1;
5202 int ncopies;
5203 int i, j;
5204 vec<tree> vec_oprnds = vNULL;
5205 tree vop;
5206 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5207 vec_info *vinfo = stmt_info->vinfo;
5208 gimple *new_stmt = NULL;
5209 stmt_vec_info prev_stmt_info = NULL;
5210 enum tree_code code;
5211 tree vectype_in;
5213 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5214 return false;
5216 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5217 && ! vec_stmt)
5218 return false;
5220 /* Is vectorizable assignment? */
5221 if (!is_gimple_assign (stmt))
5222 return false;
5224 scalar_dest = gimple_assign_lhs (stmt);
5225 if (TREE_CODE (scalar_dest) != SSA_NAME)
5226 return false;
5228 code = gimple_assign_rhs_code (stmt);
5229 if (gimple_assign_single_p (stmt)
5230 || code == PAREN_EXPR
5231 || CONVERT_EXPR_CODE_P (code))
5232 op = gimple_assign_rhs1 (stmt);
5233 else
5234 return false;
5236 if (code == VIEW_CONVERT_EXPR)
5237 op = TREE_OPERAND (op, 0);
5239 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5240 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5242 /* Multiple types in SLP are handled by creating the appropriate number of
5243 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5244 case of SLP. */
5245 if (slp_node)
5246 ncopies = 1;
5247 else
5248 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5250 gcc_assert (ncopies >= 1);
5252 if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
5254 if (dump_enabled_p ())
5255 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5256 "use not simple.\n");
5257 return false;
5260 /* We can handle NOP_EXPR conversions that do not change the number
5261 of elements or the vector size. */
5262 if ((CONVERT_EXPR_CODE_P (code)
5263 || code == VIEW_CONVERT_EXPR)
5264 && (!vectype_in
5265 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5266 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5267 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5268 return false;
5270 /* We do not handle bit-precision changes. */
5271 if ((CONVERT_EXPR_CODE_P (code)
5272 || code == VIEW_CONVERT_EXPR)
5273 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5274 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5275 || !type_has_mode_precision_p (TREE_TYPE (op)))
5276 /* But a conversion that does not change the bit-pattern is ok. */
5277 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5278 > TYPE_PRECISION (TREE_TYPE (op)))
5279 && TYPE_UNSIGNED (TREE_TYPE (op)))
5280 /* Conversion between boolean types of different sizes is
5281 a simple assignment in case their vectypes are same
5282 boolean vectors. */
5283 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5284 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5286 if (dump_enabled_p ())
5287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5288 "type conversion to/from bit-precision "
5289 "unsupported.\n");
5290 return false;
5293 if (!vec_stmt) /* transformation not required. */
5295 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5296 DUMP_VECT_SCOPE ("vectorizable_assignment");
5297 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5298 return true;
5301 /* Transform. */
5302 if (dump_enabled_p ())
5303 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5305 /* Handle def. */
5306 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5308 /* Handle use. */
5309 for (j = 0; j < ncopies; j++)
5311 /* Handle uses. */
5312 if (j == 0)
5313 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
5314 else
5315 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
5317 /* Arguments are ready. create the new vector stmt. */
5318 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5320 if (CONVERT_EXPR_CODE_P (code)
5321 || code == VIEW_CONVERT_EXPR)
5322 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5323 new_stmt = gimple_build_assign (vec_dest, vop);
5324 new_temp = make_ssa_name (vec_dest, new_stmt);
5325 gimple_assign_set_lhs (new_stmt, new_temp);
5326 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5327 if (slp_node)
5328 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5331 if (slp_node)
5332 continue;
5334 if (j == 0)
5335 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5336 else
5337 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5339 prev_stmt_info = vinfo_for_stmt (new_stmt);
5342 vec_oprnds.release ();
5343 return true;
5347 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5348 either as shift by a scalar or by a vector. */
5350 bool
5351 vect_supportable_shift (enum tree_code code, tree scalar_type)
5354 machine_mode vec_mode;
5355 optab optab;
5356 int icode;
5357 tree vectype;
5359 vectype = get_vectype_for_scalar_type (scalar_type);
5360 if (!vectype)
5361 return false;
5363 optab = optab_for_tree_code (code, vectype, optab_scalar);
5364 if (!optab
5365 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5367 optab = optab_for_tree_code (code, vectype, optab_vector);
5368 if (!optab
5369 || (optab_handler (optab, TYPE_MODE (vectype))
5370 == CODE_FOR_nothing))
5371 return false;
5374 vec_mode = TYPE_MODE (vectype);
5375 icode = (int) optab_handler (optab, vec_mode);
5376 if (icode == CODE_FOR_nothing)
5377 return false;
5379 return true;
5383 /* Function vectorizable_shift.
5385 Check if STMT performs a shift operation that can be vectorized.
5386 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5387 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5388 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5390 static bool
5391 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
5392 gimple **vec_stmt, slp_tree slp_node,
5393 stmt_vector_for_cost *cost_vec)
5395 tree vec_dest;
5396 tree scalar_dest;
5397 tree op0, op1 = NULL;
5398 tree vec_oprnd1 = NULL_TREE;
5399 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5400 tree vectype;
5401 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5402 enum tree_code code;
5403 machine_mode vec_mode;
5404 tree new_temp;
5405 optab optab;
5406 int icode;
5407 machine_mode optab_op2_mode;
5408 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5409 int ndts = 2;
5410 gimple *new_stmt = NULL;
5411 stmt_vec_info prev_stmt_info;
5412 poly_uint64 nunits_in;
5413 poly_uint64 nunits_out;
5414 tree vectype_out;
5415 tree op1_vectype;
5416 int ncopies;
5417 int j, i;
5418 vec<tree> vec_oprnds0 = vNULL;
5419 vec<tree> vec_oprnds1 = vNULL;
5420 tree vop0, vop1;
5421 unsigned int k;
5422 bool scalar_shift_arg = true;
5423 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5424 vec_info *vinfo = stmt_info->vinfo;
5426 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5427 return false;
5429 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5430 && ! vec_stmt)
5431 return false;
5433 /* Is STMT a vectorizable binary/unary operation? */
5434 if (!is_gimple_assign (stmt))
5435 return false;
5437 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5438 return false;
5440 code = gimple_assign_rhs_code (stmt);
5442 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5443 || code == RROTATE_EXPR))
5444 return false;
5446 scalar_dest = gimple_assign_lhs (stmt);
5447 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5448 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5450 if (dump_enabled_p ())
5451 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5452 "bit-precision shifts not supported.\n");
5453 return false;
5456 op0 = gimple_assign_rhs1 (stmt);
5457 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5459 if (dump_enabled_p ())
5460 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5461 "use not simple.\n");
5462 return false;
5464 /* If op0 is an external or constant def use a vector type with
5465 the same size as the output vector type. */
5466 if (!vectype)
5467 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5468 if (vec_stmt)
5469 gcc_assert (vectype);
5470 if (!vectype)
5472 if (dump_enabled_p ())
5473 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5474 "no vectype for scalar type\n");
5475 return false;
5478 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5479 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5480 if (maybe_ne (nunits_out, nunits_in))
5481 return false;
5483 op1 = gimple_assign_rhs2 (stmt);
5484 if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype))
5486 if (dump_enabled_p ())
5487 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5488 "use not simple.\n");
5489 return false;
5492 /* Multiple types in SLP are handled by creating the appropriate number of
5493 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5494 case of SLP. */
5495 if (slp_node)
5496 ncopies = 1;
5497 else
5498 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5500 gcc_assert (ncopies >= 1);
5502 /* Determine whether the shift amount is a vector, or scalar. If the
5503 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5505 if ((dt[1] == vect_internal_def
5506 || dt[1] == vect_induction_def)
5507 && !slp_node)
5508 scalar_shift_arg = false;
5509 else if (dt[1] == vect_constant_def
5510 || dt[1] == vect_external_def
5511 || dt[1] == vect_internal_def)
5513 /* In SLP, need to check whether the shift count is the same,
5514 in loops if it is a constant or invariant, it is always
5515 a scalar shift. */
5516 if (slp_node)
5518 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5519 gimple *slpstmt;
5521 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
5522 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5523 scalar_shift_arg = false;
5526 /* If the shift amount is computed by a pattern stmt we cannot
5527 use the scalar amount directly thus give up and use a vector
5528 shift. */
5529 if (dt[1] == vect_internal_def)
5531 gimple *def = SSA_NAME_DEF_STMT (op1);
5532 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
5533 scalar_shift_arg = false;
5536 else
5538 if (dump_enabled_p ())
5539 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5540 "operand mode requires invariant argument.\n");
5541 return false;
5544 /* Vector shifted by vector. */
5545 if (!scalar_shift_arg)
5547 optab = optab_for_tree_code (code, vectype, optab_vector);
5548 if (dump_enabled_p ())
5549 dump_printf_loc (MSG_NOTE, vect_location,
5550 "vector/vector shift/rotate found.\n");
5552 if (!op1_vectype)
5553 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5554 if (op1_vectype == NULL_TREE
5555 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5557 if (dump_enabled_p ())
5558 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5559 "unusable type for last operand in"
5560 " vector/vector shift/rotate.\n");
5561 return false;
5564 /* See if the machine has a vector shifted by scalar insn and if not
5565 then see if it has a vector shifted by vector insn. */
5566 else
5568 optab = optab_for_tree_code (code, vectype, optab_scalar);
5569 if (optab
5570 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5572 if (dump_enabled_p ())
5573 dump_printf_loc (MSG_NOTE, vect_location,
5574 "vector/scalar shift/rotate found.\n");
5576 else
5578 optab = optab_for_tree_code (code, vectype, optab_vector);
5579 if (optab
5580 && (optab_handler (optab, TYPE_MODE (vectype))
5581 != CODE_FOR_nothing))
5583 scalar_shift_arg = false;
5585 if (dump_enabled_p ())
5586 dump_printf_loc (MSG_NOTE, vect_location,
5587 "vector/vector shift/rotate found.\n");
5589 /* Unlike the other binary operators, shifts/rotates have
5590 the rhs being int, instead of the same type as the lhs,
5591 so make sure the scalar is the right type if we are
5592 dealing with vectors of long long/long/short/char. */
5593 if (dt[1] == vect_constant_def)
5594 op1 = fold_convert (TREE_TYPE (vectype), op1);
5595 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5596 TREE_TYPE (op1)))
5598 if (slp_node
5599 && TYPE_MODE (TREE_TYPE (vectype))
5600 != TYPE_MODE (TREE_TYPE (op1)))
5602 if (dump_enabled_p ())
5603 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5604 "unusable type for last operand in"
5605 " vector/vector shift/rotate.\n");
5606 return false;
5608 if (vec_stmt && !slp_node)
5610 op1 = fold_convert (TREE_TYPE (vectype), op1);
5611 op1 = vect_init_vector (stmt, op1,
5612 TREE_TYPE (vectype), NULL);
5619 /* Supportable by target? */
5620 if (!optab)
5622 if (dump_enabled_p ())
5623 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5624 "no optab.\n");
5625 return false;
5627 vec_mode = TYPE_MODE (vectype);
5628 icode = (int) optab_handler (optab, vec_mode);
5629 if (icode == CODE_FOR_nothing)
5631 if (dump_enabled_p ())
5632 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5633 "op not supported by target.\n");
5634 /* Check only during analysis. */
5635 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5636 || (!vec_stmt
5637 && !vect_worthwhile_without_simd_p (vinfo, code)))
5638 return false;
5639 if (dump_enabled_p ())
5640 dump_printf_loc (MSG_NOTE, vect_location,
5641 "proceeding using word mode.\n");
5644 /* Worthwhile without SIMD support? Check only during analysis. */
5645 if (!vec_stmt
5646 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5647 && !vect_worthwhile_without_simd_p (vinfo, code))
5649 if (dump_enabled_p ())
5650 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5651 "not worthwhile without SIMD support.\n");
5652 return false;
5655 if (!vec_stmt) /* transformation not required. */
5657 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5658 DUMP_VECT_SCOPE ("vectorizable_shift");
5659 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5660 return true;
5663 /* Transform. */
5665 if (dump_enabled_p ())
5666 dump_printf_loc (MSG_NOTE, vect_location,
5667 "transform binary/unary operation.\n");
5669 /* Handle def. */
5670 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5672 prev_stmt_info = NULL;
5673 for (j = 0; j < ncopies; j++)
5675 /* Handle uses. */
5676 if (j == 0)
5678 if (scalar_shift_arg)
5680 /* Vector shl and shr insn patterns can be defined with scalar
5681 operand 2 (shift operand). In this case, use constant or loop
5682 invariant op1 directly, without extending it to vector mode
5683 first. */
5684 optab_op2_mode = insn_data[icode].operand[2].mode;
5685 if (!VECTOR_MODE_P (optab_op2_mode))
5687 if (dump_enabled_p ())
5688 dump_printf_loc (MSG_NOTE, vect_location,
5689 "operand 1 using scalar mode.\n");
5690 vec_oprnd1 = op1;
5691 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5692 vec_oprnds1.quick_push (vec_oprnd1);
5693 if (slp_node)
5695 /* Store vec_oprnd1 for every vector stmt to be created
5696 for SLP_NODE. We check during the analysis that all
5697 the shift arguments are the same.
5698 TODO: Allow different constants for different vector
5699 stmts generated for an SLP instance. */
5700 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5701 vec_oprnds1.quick_push (vec_oprnd1);
5706 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5707 (a special case for certain kind of vector shifts); otherwise,
5708 operand 1 should be of a vector type (the usual case). */
5709 if (vec_oprnd1)
5710 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5711 slp_node);
5712 else
5713 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5714 slp_node);
5716 else
5717 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5719 /* Arguments are ready. Create the new vector stmt. */
5720 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5722 vop1 = vec_oprnds1[i];
5723 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5724 new_temp = make_ssa_name (vec_dest, new_stmt);
5725 gimple_assign_set_lhs (new_stmt, new_temp);
5726 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5727 if (slp_node)
5728 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5731 if (slp_node)
5732 continue;
5734 if (j == 0)
5735 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5736 else
5737 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5738 prev_stmt_info = vinfo_for_stmt (new_stmt);
5741 vec_oprnds0.release ();
5742 vec_oprnds1.release ();
5744 return true;
5748 /* Function vectorizable_operation.
5750 Check if STMT performs a binary, unary or ternary operation that can
5751 be vectorized.
5752 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5753 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5754 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5756 static bool
5757 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5758 gimple **vec_stmt, slp_tree slp_node,
5759 stmt_vector_for_cost *cost_vec)
5761 tree vec_dest;
5762 tree scalar_dest;
5763 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5764 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5765 tree vectype;
5766 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5767 enum tree_code code, orig_code;
5768 machine_mode vec_mode;
5769 tree new_temp;
5770 int op_type;
5771 optab optab;
5772 bool target_support_p;
5773 enum vect_def_type dt[3]
5774 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5775 int ndts = 3;
5776 gimple *new_stmt = NULL;
5777 stmt_vec_info prev_stmt_info;
5778 poly_uint64 nunits_in;
5779 poly_uint64 nunits_out;
5780 tree vectype_out;
5781 int ncopies;
5782 int j, i;
5783 vec<tree> vec_oprnds0 = vNULL;
5784 vec<tree> vec_oprnds1 = vNULL;
5785 vec<tree> vec_oprnds2 = vNULL;
5786 tree vop0, vop1, vop2;
5787 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5788 vec_info *vinfo = stmt_info->vinfo;
5790 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5791 return false;
5793 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5794 && ! vec_stmt)
5795 return false;
5797 /* Is STMT a vectorizable binary/unary operation? */
5798 if (!is_gimple_assign (stmt))
5799 return false;
5801 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5802 return false;
5804 orig_code = code = gimple_assign_rhs_code (stmt);
5806 /* For pointer addition and subtraction, we should use the normal
5807 plus and minus for the vector operation. */
5808 if (code == POINTER_PLUS_EXPR)
5809 code = PLUS_EXPR;
5810 if (code == POINTER_DIFF_EXPR)
5811 code = MINUS_EXPR;
5813 /* Support only unary or binary operations. */
5814 op_type = TREE_CODE_LENGTH (code);
5815 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5817 if (dump_enabled_p ())
5818 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5819 "num. args = %d (not unary/binary/ternary op).\n",
5820 op_type);
5821 return false;
5824 scalar_dest = gimple_assign_lhs (stmt);
5825 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5827 /* Most operations cannot handle bit-precision types without extra
5828 truncations. */
5829 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5830 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5831 /* Exception are bitwise binary operations. */
5832 && code != BIT_IOR_EXPR
5833 && code != BIT_XOR_EXPR
5834 && code != BIT_AND_EXPR)
5836 if (dump_enabled_p ())
5837 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5838 "bit-precision arithmetic not supported.\n");
5839 return false;
5842 op0 = gimple_assign_rhs1 (stmt);
5843 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5845 if (dump_enabled_p ())
5846 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5847 "use not simple.\n");
5848 return false;
5850 /* If op0 is an external or constant def use a vector type with
5851 the same size as the output vector type. */
5852 if (!vectype)
5854 /* For boolean type we cannot determine vectype by
5855 invariant value (don't know whether it is a vector
5856 of booleans or vector of integers). We use output
5857 vectype because operations on boolean don't change
5858 type. */
5859 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5861 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5863 if (dump_enabled_p ())
5864 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5865 "not supported operation on bool value.\n");
5866 return false;
5868 vectype = vectype_out;
5870 else
5871 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5873 if (vec_stmt)
5874 gcc_assert (vectype);
5875 if (!vectype)
5877 if (dump_enabled_p ())
5879 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5880 "no vectype for scalar type ");
5881 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5882 TREE_TYPE (op0));
5883 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5886 return false;
5889 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5890 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5891 if (maybe_ne (nunits_out, nunits_in))
5892 return false;
5894 if (op_type == binary_op || op_type == ternary_op)
5896 op1 = gimple_assign_rhs2 (stmt);
5897 if (!vect_is_simple_use (op1, vinfo, &dt[1]))
5899 if (dump_enabled_p ())
5900 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5901 "use not simple.\n");
5902 return false;
5905 if (op_type == ternary_op)
5907 op2 = gimple_assign_rhs3 (stmt);
5908 if (!vect_is_simple_use (op2, vinfo, &dt[2]))
5910 if (dump_enabled_p ())
5911 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5912 "use not simple.\n");
5913 return false;
5917 /* Multiple types in SLP are handled by creating the appropriate number of
5918 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5919 case of SLP. */
5920 if (slp_node)
5921 ncopies = 1;
5922 else
5923 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5925 gcc_assert (ncopies >= 1);
5927 /* Shifts are handled in vectorizable_shift (). */
5928 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5929 || code == RROTATE_EXPR)
5930 return false;
5932 /* Supportable by target? */
5934 vec_mode = TYPE_MODE (vectype);
5935 if (code == MULT_HIGHPART_EXPR)
5936 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5937 else
5939 optab = optab_for_tree_code (code, vectype, optab_default);
5940 if (!optab)
5942 if (dump_enabled_p ())
5943 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5944 "no optab.\n");
5945 return false;
5947 target_support_p = (optab_handler (optab, vec_mode)
5948 != CODE_FOR_nothing);
5951 if (!target_support_p)
5953 if (dump_enabled_p ())
5954 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5955 "op not supported by target.\n");
5956 /* Check only during analysis. */
5957 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5958 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5959 return false;
5960 if (dump_enabled_p ())
5961 dump_printf_loc (MSG_NOTE, vect_location,
5962 "proceeding using word mode.\n");
5965 /* Worthwhile without SIMD support? Check only during analysis. */
5966 if (!VECTOR_MODE_P (vec_mode)
5967 && !vec_stmt
5968 && !vect_worthwhile_without_simd_p (vinfo, code))
5970 if (dump_enabled_p ())
5971 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5972 "not worthwhile without SIMD support.\n");
5973 return false;
5976 if (!vec_stmt) /* transformation not required. */
5978 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5979 DUMP_VECT_SCOPE ("vectorizable_operation");
5980 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5981 return true;
5984 /* Transform. */
5986 if (dump_enabled_p ())
5987 dump_printf_loc (MSG_NOTE, vect_location,
5988 "transform binary/unary operation.\n");
5990 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5991 vectors with unsigned elements, but the result is signed. So, we
5992 need to compute the MINUS_EXPR into vectype temporary and
5993 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5994 tree vec_cvt_dest = NULL_TREE;
5995 if (orig_code == POINTER_DIFF_EXPR)
5997 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5998 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6000 /* Handle def. */
6001 else
6002 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6004 /* In case the vectorization factor (VF) is bigger than the number
6005 of elements that we can fit in a vectype (nunits), we have to generate
6006 more than one vector stmt - i.e - we need to "unroll" the
6007 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6008 from one copy of the vector stmt to the next, in the field
6009 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6010 stages to find the correct vector defs to be used when vectorizing
6011 stmts that use the defs of the current stmt. The example below
6012 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6013 we need to create 4 vectorized stmts):
6015 before vectorization:
6016 RELATED_STMT VEC_STMT
6017 S1: x = memref - -
6018 S2: z = x + 1 - -
6020 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6021 there):
6022 RELATED_STMT VEC_STMT
6023 VS1_0: vx0 = memref0 VS1_1 -
6024 VS1_1: vx1 = memref1 VS1_2 -
6025 VS1_2: vx2 = memref2 VS1_3 -
6026 VS1_3: vx3 = memref3 - -
6027 S1: x = load - VS1_0
6028 S2: z = x + 1 - -
6030 step2: vectorize stmt S2 (done here):
6031 To vectorize stmt S2 we first need to find the relevant vector
6032 def for the first operand 'x'. This is, as usual, obtained from
6033 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6034 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6035 relevant vector def 'vx0'. Having found 'vx0' we can generate
6036 the vector stmt VS2_0, and as usual, record it in the
6037 STMT_VINFO_VEC_STMT of stmt S2.
6038 When creating the second copy (VS2_1), we obtain the relevant vector
6039 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6040 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6041 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6042 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6043 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6044 chain of stmts and pointers:
6045 RELATED_STMT VEC_STMT
6046 VS1_0: vx0 = memref0 VS1_1 -
6047 VS1_1: vx1 = memref1 VS1_2 -
6048 VS1_2: vx2 = memref2 VS1_3 -
6049 VS1_3: vx3 = memref3 - -
6050 S1: x = load - VS1_0
6051 VS2_0: vz0 = vx0 + v1 VS2_1 -
6052 VS2_1: vz1 = vx1 + v1 VS2_2 -
6053 VS2_2: vz2 = vx2 + v1 VS2_3 -
6054 VS2_3: vz3 = vx3 + v1 - -
6055 S2: z = x + 1 - VS2_0 */
6057 prev_stmt_info = NULL;
6058 for (j = 0; j < ncopies; j++)
6060 /* Handle uses. */
6061 if (j == 0)
6063 if (op_type == binary_op)
6064 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
6065 slp_node);
6066 else if (op_type == ternary_op)
6068 if (slp_node)
6070 auto_vec<tree> ops(3);
6071 ops.quick_push (op0);
6072 ops.quick_push (op1);
6073 ops.quick_push (op2);
6074 auto_vec<vec<tree> > vec_defs(3);
6075 vect_get_slp_defs (ops, slp_node, &vec_defs);
6076 vec_oprnds0 = vec_defs[0];
6077 vec_oprnds1 = vec_defs[1];
6078 vec_oprnds2 = vec_defs[2];
6080 else
6082 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
6083 NULL);
6084 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
6085 NULL);
6088 else
6089 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
6090 slp_node);
6092 else
6094 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
6095 if (op_type == ternary_op)
6097 tree vec_oprnd = vec_oprnds2.pop ();
6098 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
6099 vec_oprnd));
6103 /* Arguments are ready. Create the new vector stmt. */
6104 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6106 vop1 = ((op_type == binary_op || op_type == ternary_op)
6107 ? vec_oprnds1[i] : NULL_TREE);
6108 vop2 = ((op_type == ternary_op)
6109 ? vec_oprnds2[i] : NULL_TREE);
6110 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6111 new_temp = make_ssa_name (vec_dest, new_stmt);
6112 gimple_assign_set_lhs (new_stmt, new_temp);
6113 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6114 if (vec_cvt_dest)
6116 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6117 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6118 new_temp);
6119 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6120 gimple_assign_set_lhs (new_stmt, new_temp);
6121 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6123 if (slp_node)
6124 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6127 if (slp_node)
6128 continue;
6130 if (j == 0)
6131 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6132 else
6133 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6134 prev_stmt_info = vinfo_for_stmt (new_stmt);
6137 vec_oprnds0.release ();
6138 vec_oprnds1.release ();
6139 vec_oprnds2.release ();
6141 return true;
6144 /* A helper function to ensure data reference DR's base alignment. */
6146 static void
6147 ensure_base_align (struct data_reference *dr)
6149 if (DR_VECT_AUX (dr)->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6150 return;
6152 if (DR_VECT_AUX (dr)->base_misaligned)
6154 tree base_decl = DR_VECT_AUX (dr)->base_decl;
6156 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
6158 if (decl_in_symtab_p (base_decl))
6159 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6160 else
6162 SET_DECL_ALIGN (base_decl, align_base_to);
6163 DECL_USER_ALIGN (base_decl) = 1;
6165 DR_VECT_AUX (dr)->base_misaligned = false;
6170 /* Function get_group_alias_ptr_type.
6172 Return the alias type for the group starting at FIRST_STMT. */
6174 static tree
6175 get_group_alias_ptr_type (gimple *first_stmt)
6177 struct data_reference *first_dr, *next_dr;
6178 gimple *next_stmt;
6180 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6181 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
6182 while (next_stmt)
6184 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
6185 if (get_alias_set (DR_REF (first_dr))
6186 != get_alias_set (DR_REF (next_dr)))
6188 if (dump_enabled_p ())
6189 dump_printf_loc (MSG_NOTE, vect_location,
6190 "conflicting alias set types.\n");
6191 return ptr_type_node;
6193 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6195 return reference_alias_ptr_type (DR_REF (first_dr));
6199 /* Function vectorizable_store.
6201 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
6202 can be vectorized.
6203 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6204 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6205 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6207 static bool
6208 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6209 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
6211 tree data_ref;
6212 tree op;
6213 tree vec_oprnd = NULL_TREE;
6214 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6215 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6216 tree elem_type;
6217 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6218 struct loop *loop = NULL;
6219 machine_mode vec_mode;
6220 tree dummy;
6221 enum dr_alignment_support alignment_support_scheme;
6222 enum vect_def_type rhs_dt = vect_unknown_def_type;
6223 enum vect_def_type mask_dt = vect_unknown_def_type;
6224 stmt_vec_info prev_stmt_info = NULL;
6225 tree dataref_ptr = NULL_TREE;
6226 tree dataref_offset = NULL_TREE;
6227 gimple *ptr_incr = NULL;
6228 int ncopies;
6229 int j;
6230 gimple *next_stmt, *first_stmt;
6231 bool grouped_store;
6232 unsigned int group_size, i;
6233 vec<tree> oprnds = vNULL;
6234 vec<tree> result_chain = vNULL;
6235 bool inv_p;
6236 tree offset = NULL_TREE;
6237 vec<tree> vec_oprnds = vNULL;
6238 bool slp = (slp_node != NULL);
6239 unsigned int vec_num;
6240 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6241 vec_info *vinfo = stmt_info->vinfo;
6242 tree aggr_type;
6243 gather_scatter_info gs_info;
6244 gimple *new_stmt;
6245 poly_uint64 vf;
6246 vec_load_store_type vls_type;
6247 tree ref_type;
6249 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6250 return false;
6252 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6253 && ! vec_stmt)
6254 return false;
6256 /* Is vectorizable store? */
6258 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6259 if (is_gimple_assign (stmt))
6261 tree scalar_dest = gimple_assign_lhs (stmt);
6262 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6263 && is_pattern_stmt_p (stmt_info))
6264 scalar_dest = TREE_OPERAND (scalar_dest, 0);
6265 if (TREE_CODE (scalar_dest) != ARRAY_REF
6266 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6267 && TREE_CODE (scalar_dest) != INDIRECT_REF
6268 && TREE_CODE (scalar_dest) != COMPONENT_REF
6269 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6270 && TREE_CODE (scalar_dest) != REALPART_EXPR
6271 && TREE_CODE (scalar_dest) != MEM_REF)
6272 return false;
6274 else
6276 gcall *call = dyn_cast <gcall *> (stmt);
6277 if (!call || !gimple_call_internal_p (call))
6278 return false;
6280 internal_fn ifn = gimple_call_internal_fn (call);
6281 if (!internal_store_fn_p (ifn))
6282 return false;
6284 if (slp_node != NULL)
6286 if (dump_enabled_p ())
6287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6288 "SLP of masked stores not supported.\n");
6289 return false;
6292 int mask_index = internal_fn_mask_index (ifn);
6293 if (mask_index >= 0)
6295 mask = gimple_call_arg (call, mask_index);
6296 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
6297 &mask_vectype))
6298 return false;
6302 op = vect_get_store_rhs (stmt);
6304 /* Cannot have hybrid store SLP -- that would mean storing to the
6305 same location twice. */
6306 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6308 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
6309 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6311 if (loop_vinfo)
6313 loop = LOOP_VINFO_LOOP (loop_vinfo);
6314 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6316 else
6317 vf = 1;
6319 /* Multiple types in SLP are handled by creating the appropriate number of
6320 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6321 case of SLP. */
6322 if (slp)
6323 ncopies = 1;
6324 else
6325 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6327 gcc_assert (ncopies >= 1);
6329 /* FORNOW. This restriction should be relaxed. */
6330 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
6332 if (dump_enabled_p ())
6333 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6334 "multiple types in nested loop.\n");
6335 return false;
6338 if (!vect_check_store_rhs (stmt, op, &rhs_dt, &rhs_vectype, &vls_type))
6339 return false;
6341 elem_type = TREE_TYPE (vectype);
6342 vec_mode = TYPE_MODE (vectype);
6344 if (!STMT_VINFO_DATA_REF (stmt_info))
6345 return false;
6347 vect_memory_access_type memory_access_type;
6348 if (!get_load_store_type (stmt, vectype, slp, mask, vls_type, ncopies,
6349 &memory_access_type, &gs_info))
6350 return false;
6352 if (mask)
6354 if (memory_access_type == VMAT_CONTIGUOUS)
6356 if (!VECTOR_MODE_P (vec_mode)
6357 || !can_vec_mask_load_store_p (vec_mode,
6358 TYPE_MODE (mask_vectype), false))
6359 return false;
6361 else if (memory_access_type != VMAT_LOAD_STORE_LANES
6362 && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
6364 if (dump_enabled_p ())
6365 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6366 "unsupported access type for masked store.\n");
6367 return false;
6370 else
6372 /* FORNOW. In some cases can vectorize even if data-type not supported
6373 (e.g. - array initialization with 0). */
6374 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6375 return false;
6378 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
6379 && memory_access_type != VMAT_GATHER_SCATTER
6380 && (slp || memory_access_type != VMAT_CONTIGUOUS));
6381 if (grouped_store)
6383 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
6384 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6385 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
6387 else
6389 first_stmt = stmt;
6390 first_dr = dr;
6391 group_size = vec_num = 1;
6394 if (!vec_stmt) /* transformation not required. */
6396 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6398 if (loop_vinfo
6399 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6400 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
6401 memory_access_type, &gs_info);
6403 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
6404 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
6405 vls_type, slp_node, cost_vec);
6406 return true;
6408 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6410 /* Transform. */
6412 ensure_base_align (dr);
6414 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
6416 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
6417 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6418 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6419 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
6420 edge pe = loop_preheader_edge (loop);
6421 gimple_seq seq;
6422 basic_block new_bb;
6423 enum { NARROW, NONE, WIDEN } modifier;
6424 poly_uint64 scatter_off_nunits
6425 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6427 if (known_eq (nunits, scatter_off_nunits))
6428 modifier = NONE;
6429 else if (known_eq (nunits * 2, scatter_off_nunits))
6431 modifier = WIDEN;
6433 /* Currently gathers and scatters are only supported for
6434 fixed-length vectors. */
6435 unsigned int count = scatter_off_nunits.to_constant ();
6436 vec_perm_builder sel (count, count, 1);
6437 for (i = 0; i < (unsigned int) count; ++i)
6438 sel.quick_push (i | (count / 2));
6440 vec_perm_indices indices (sel, 1, count);
6441 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6442 indices);
6443 gcc_assert (perm_mask != NULL_TREE);
6445 else if (known_eq (nunits, scatter_off_nunits * 2))
6447 modifier = NARROW;
6449 /* Currently gathers and scatters are only supported for
6450 fixed-length vectors. */
6451 unsigned int count = nunits.to_constant ();
6452 vec_perm_builder sel (count, count, 1);
6453 for (i = 0; i < (unsigned int) count; ++i)
6454 sel.quick_push (i | (count / 2));
6456 vec_perm_indices indices (sel, 2, count);
6457 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
6458 gcc_assert (perm_mask != NULL_TREE);
6459 ncopies *= 2;
6461 else
6462 gcc_unreachable ();
6464 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6465 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6466 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6467 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6468 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6469 scaletype = TREE_VALUE (arglist);
6471 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6472 && TREE_CODE (rettype) == VOID_TYPE);
6474 ptr = fold_convert (ptrtype, gs_info.base);
6475 if (!is_gimple_min_invariant (ptr))
6477 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6478 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6479 gcc_assert (!new_bb);
6482 /* Currently we support only unconditional scatter stores,
6483 so mask should be all ones. */
6484 mask = build_int_cst (masktype, -1);
6485 mask = vect_init_vector (stmt, mask, masktype, NULL);
6487 scale = build_int_cst (scaletype, gs_info.scale);
6489 prev_stmt_info = NULL;
6490 for (j = 0; j < ncopies; ++j)
6492 if (j == 0)
6494 src = vec_oprnd1
6495 = vect_get_vec_def_for_operand (op, stmt);
6496 op = vec_oprnd0
6497 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6499 else if (modifier != NONE && (j & 1))
6501 if (modifier == WIDEN)
6503 src = vec_oprnd1
6504 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
6505 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6506 stmt, gsi);
6508 else if (modifier == NARROW)
6510 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6511 stmt, gsi);
6512 op = vec_oprnd0
6513 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6514 vec_oprnd0);
6516 else
6517 gcc_unreachable ();
6519 else
6521 src = vec_oprnd1
6522 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
6523 op = vec_oprnd0
6524 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6525 vec_oprnd0);
6528 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6530 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6531 TYPE_VECTOR_SUBPARTS (srctype)));
6532 var = vect_get_new_ssa_name (srctype, vect_simple_var);
6533 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6534 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6535 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6536 src = var;
6539 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6541 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6542 TYPE_VECTOR_SUBPARTS (idxtype)));
6543 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6544 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6545 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6546 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6547 op = var;
6550 new_stmt
6551 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
6553 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6555 if (prev_stmt_info == NULL)
6556 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6557 else
6558 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6559 prev_stmt_info = vinfo_for_stmt (new_stmt);
6561 return true;
6564 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6566 gimple *group_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
6567 DR_GROUP_STORE_COUNT (vinfo_for_stmt (group_stmt))++;
6570 if (grouped_store)
6572 /* FORNOW */
6573 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
6575 /* We vectorize all the stmts of the interleaving group when we
6576 reach the last stmt in the group. */
6577 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
6578 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
6579 && !slp)
6581 *vec_stmt = NULL;
6582 return true;
6585 if (slp)
6587 grouped_store = false;
6588 /* VEC_NUM is the number of vect stmts to be created for this
6589 group. */
6590 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6591 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6592 gcc_assert (DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
6593 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6594 op = vect_get_store_rhs (first_stmt);
6596 else
6597 /* VEC_NUM is the number of vect stmts to be created for this
6598 group. */
6599 vec_num = group_size;
6601 ref_type = get_group_alias_ptr_type (first_stmt);
6603 else
6604 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6606 if (dump_enabled_p ())
6607 dump_printf_loc (MSG_NOTE, vect_location,
6608 "transform store. ncopies = %d\n", ncopies);
6610 if (memory_access_type == VMAT_ELEMENTWISE
6611 || memory_access_type == VMAT_STRIDED_SLP)
6613 gimple_stmt_iterator incr_gsi;
6614 bool insert_after;
6615 gimple *incr;
6616 tree offvar;
6617 tree ivstep;
6618 tree running_off;
6619 tree stride_base, stride_step, alias_off;
6620 tree vec_oprnd;
6621 unsigned int g;
6622 /* Checked by get_load_store_type. */
6623 unsigned int const_nunits = nunits.to_constant ();
6625 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
6626 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6628 stride_base
6629 = fold_build_pointer_plus
6630 (DR_BASE_ADDRESS (first_dr),
6631 size_binop (PLUS_EXPR,
6632 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6633 convert_to_ptrofftype (DR_INIT (first_dr))));
6634 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6636 /* For a store with loop-invariant (but other than power-of-2)
6637 stride (i.e. not a grouped access) like so:
6639 for (i = 0; i < n; i += stride)
6640 array[i] = ...;
6642 we generate a new induction variable and new stores from
6643 the components of the (vectorized) rhs:
6645 for (j = 0; ; j += VF*stride)
6646 vectemp = ...;
6647 tmp1 = vectemp[0];
6648 array[j] = tmp1;
6649 tmp2 = vectemp[1];
6650 array[j + stride] = tmp2;
6654 unsigned nstores = const_nunits;
6655 unsigned lnel = 1;
6656 tree ltype = elem_type;
6657 tree lvectype = vectype;
6658 if (slp)
6660 if (group_size < const_nunits
6661 && const_nunits % group_size == 0)
6663 nstores = const_nunits / group_size;
6664 lnel = group_size;
6665 ltype = build_vector_type (elem_type, group_size);
6666 lvectype = vectype;
6668 /* First check if vec_extract optab doesn't support extraction
6669 of vector elts directly. */
6670 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6671 machine_mode vmode;
6672 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6673 || !VECTOR_MODE_P (vmode)
6674 || !targetm.vector_mode_supported_p (vmode)
6675 || (convert_optab_handler (vec_extract_optab,
6676 TYPE_MODE (vectype), vmode)
6677 == CODE_FOR_nothing))
6679 /* Try to avoid emitting an extract of vector elements
6680 by performing the extracts using an integer type of the
6681 same size, extracting from a vector of those and then
6682 re-interpreting it as the original vector type if
6683 supported. */
6684 unsigned lsize
6685 = group_size * GET_MODE_BITSIZE (elmode);
6686 elmode = int_mode_for_size (lsize, 0).require ();
6687 unsigned int lnunits = const_nunits / group_size;
6688 /* If we can't construct such a vector fall back to
6689 element extracts from the original vector type and
6690 element size stores. */
6691 if (mode_for_vector (elmode, lnunits).exists (&vmode)
6692 && VECTOR_MODE_P (vmode)
6693 && targetm.vector_mode_supported_p (vmode)
6694 && (convert_optab_handler (vec_extract_optab,
6695 vmode, elmode)
6696 != CODE_FOR_nothing))
6698 nstores = lnunits;
6699 lnel = group_size;
6700 ltype = build_nonstandard_integer_type (lsize, 1);
6701 lvectype = build_vector_type (ltype, nstores);
6703 /* Else fall back to vector extraction anyway.
6704 Fewer stores are more important than avoiding spilling
6705 of the vector we extract from. Compared to the
6706 construction case in vectorizable_load no store-forwarding
6707 issue exists here for reasonable archs. */
6710 else if (group_size >= const_nunits
6711 && group_size % const_nunits == 0)
6713 nstores = 1;
6714 lnel = const_nunits;
6715 ltype = vectype;
6716 lvectype = vectype;
6718 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6719 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6722 ivstep = stride_step;
6723 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6724 build_int_cst (TREE_TYPE (ivstep), vf));
6726 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6728 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6729 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
6730 create_iv (stride_base, ivstep, NULL,
6731 loop, &incr_gsi, insert_after,
6732 &offvar, NULL);
6733 incr = gsi_stmt (incr_gsi);
6734 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6736 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
6738 prev_stmt_info = NULL;
6739 alias_off = build_int_cst (ref_type, 0);
6740 next_stmt = first_stmt;
6741 for (g = 0; g < group_size; g++)
6743 running_off = offvar;
6744 if (g)
6746 tree size = TYPE_SIZE_UNIT (ltype);
6747 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6748 size);
6749 tree newoff = copy_ssa_name (running_off, NULL);
6750 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6751 running_off, pos);
6752 vect_finish_stmt_generation (stmt, incr, gsi);
6753 running_off = newoff;
6755 unsigned int group_el = 0;
6756 unsigned HOST_WIDE_INT
6757 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6758 for (j = 0; j < ncopies; j++)
6760 /* We've set op and dt above, from vect_get_store_rhs,
6761 and first_stmt == stmt. */
6762 if (j == 0)
6764 if (slp)
6766 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6767 slp_node);
6768 vec_oprnd = vec_oprnds[0];
6770 else
6772 op = vect_get_store_rhs (next_stmt);
6773 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6776 else
6778 if (slp)
6779 vec_oprnd = vec_oprnds[j];
6780 else
6782 vect_is_simple_use (op, vinfo, &rhs_dt);
6783 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt,
6784 vec_oprnd);
6787 /* Pun the vector to extract from if necessary. */
6788 if (lvectype != vectype)
6790 tree tem = make_ssa_name (lvectype);
6791 gimple *pun
6792 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6793 lvectype, vec_oprnd));
6794 vect_finish_stmt_generation (stmt, pun, gsi);
6795 vec_oprnd = tem;
6797 for (i = 0; i < nstores; i++)
6799 tree newref, newoff;
6800 gimple *incr, *assign;
6801 tree size = TYPE_SIZE (ltype);
6802 /* Extract the i'th component. */
6803 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6804 bitsize_int (i), size);
6805 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6806 size, pos);
6808 elem = force_gimple_operand_gsi (gsi, elem, true,
6809 NULL_TREE, true,
6810 GSI_SAME_STMT);
6812 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6813 group_el * elsz);
6814 newref = build2 (MEM_REF, ltype,
6815 running_off, this_off);
6816 vect_copy_ref_info (newref, DR_REF (first_dr));
6818 /* And store it to *running_off. */
6819 assign = gimple_build_assign (newref, elem);
6820 vect_finish_stmt_generation (stmt, assign, gsi);
6822 group_el += lnel;
6823 if (! slp
6824 || group_el == group_size)
6826 newoff = copy_ssa_name (running_off, NULL);
6827 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6828 running_off, stride_step);
6829 vect_finish_stmt_generation (stmt, incr, gsi);
6831 running_off = newoff;
6832 group_el = 0;
6834 if (g == group_size - 1
6835 && !slp)
6837 if (j == 0 && i == 0)
6838 STMT_VINFO_VEC_STMT (stmt_info)
6839 = *vec_stmt = assign;
6840 else
6841 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6842 prev_stmt_info = vinfo_for_stmt (assign);
6846 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6847 if (slp)
6848 break;
6851 vec_oprnds.release ();
6852 return true;
6855 auto_vec<tree> dr_chain (group_size);
6856 oprnds.create (group_size);
6858 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6859 gcc_assert (alignment_support_scheme);
6860 vec_loop_masks *loop_masks
6861 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6862 ? &LOOP_VINFO_MASKS (loop_vinfo)
6863 : NULL);
6864 /* Targets with store-lane instructions must not require explicit
6865 realignment. vect_supportable_dr_alignment always returns either
6866 dr_aligned or dr_unaligned_supported for masked operations. */
6867 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6868 && !mask
6869 && !loop_masks)
6870 || alignment_support_scheme == dr_aligned
6871 || alignment_support_scheme == dr_unaligned_supported);
6873 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6874 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6875 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6877 tree bump;
6878 tree vec_offset = NULL_TREE;
6879 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6881 aggr_type = NULL_TREE;
6882 bump = NULL_TREE;
6884 else if (memory_access_type == VMAT_GATHER_SCATTER)
6886 aggr_type = elem_type;
6887 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
6888 &bump, &vec_offset);
6890 else
6892 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6893 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6894 else
6895 aggr_type = vectype;
6896 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
6899 if (mask)
6900 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6902 /* In case the vectorization factor (VF) is bigger than the number
6903 of elements that we can fit in a vectype (nunits), we have to generate
6904 more than one vector stmt - i.e - we need to "unroll" the
6905 vector stmt by a factor VF/nunits. For more details see documentation in
6906 vect_get_vec_def_for_copy_stmt. */
6908 /* In case of interleaving (non-unit grouped access):
6910 S1: &base + 2 = x2
6911 S2: &base = x0
6912 S3: &base + 1 = x1
6913 S4: &base + 3 = x3
6915 We create vectorized stores starting from base address (the access of the
6916 first stmt in the chain (S2 in the above example), when the last store stmt
6917 of the chain (S4) is reached:
6919 VS1: &base = vx2
6920 VS2: &base + vec_size*1 = vx0
6921 VS3: &base + vec_size*2 = vx1
6922 VS4: &base + vec_size*3 = vx3
6924 Then permutation statements are generated:
6926 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6927 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6930 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6931 (the order of the data-refs in the output of vect_permute_store_chain
6932 corresponds to the order of scalar stmts in the interleaving chain - see
6933 the documentation of vect_permute_store_chain()).
6935 In case of both multiple types and interleaving, above vector stores and
6936 permutation stmts are created for every copy. The result vector stmts are
6937 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6938 STMT_VINFO_RELATED_STMT for the next copies.
6941 prev_stmt_info = NULL;
6942 tree vec_mask = NULL_TREE;
6943 for (j = 0; j < ncopies; j++)
6946 if (j == 0)
6948 if (slp)
6950 /* Get vectorized arguments for SLP_NODE. */
6951 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6952 NULL, slp_node);
6954 vec_oprnd = vec_oprnds[0];
6956 else
6958 /* For interleaved stores we collect vectorized defs for all the
6959 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6960 used as an input to vect_permute_store_chain(), and OPRNDS as
6961 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6963 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
6964 OPRNDS are of size 1. */
6965 next_stmt = first_stmt;
6966 for (i = 0; i < group_size; i++)
6968 /* Since gaps are not supported for interleaved stores,
6969 DR_GROUP_SIZE is the exact number of stmts in the chain.
6970 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6971 there is no interleaving, DR_GROUP_SIZE is 1, and only one
6972 iteration of the loop will be executed. */
6973 op = vect_get_store_rhs (next_stmt);
6974 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6975 dr_chain.quick_push (vec_oprnd);
6976 oprnds.quick_push (vec_oprnd);
6977 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6979 if (mask)
6980 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
6981 mask_vectype);
6984 /* We should have catched mismatched types earlier. */
6985 gcc_assert (useless_type_conversion_p (vectype,
6986 TREE_TYPE (vec_oprnd)));
6987 bool simd_lane_access_p
6988 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6989 if (simd_lane_access_p
6990 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6991 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6992 && integer_zerop (DR_OFFSET (first_dr))
6993 && integer_zerop (DR_INIT (first_dr))
6994 && alias_sets_conflict_p (get_alias_set (aggr_type),
6995 get_alias_set (TREE_TYPE (ref_type))))
6997 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6998 dataref_offset = build_int_cst (ref_type, 0);
6999 inv_p = false;
7001 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7003 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
7004 &dataref_ptr, &vec_offset);
7005 inv_p = false;
7007 else
7008 dataref_ptr
7009 = vect_create_data_ref_ptr (first_stmt, aggr_type,
7010 simd_lane_access_p ? loop : NULL,
7011 offset, &dummy, gsi, &ptr_incr,
7012 simd_lane_access_p, &inv_p,
7013 NULL_TREE, bump);
7014 gcc_assert (bb_vinfo || !inv_p);
7016 else
7018 /* For interleaved stores we created vectorized defs for all the
7019 defs stored in OPRNDS in the previous iteration (previous copy).
7020 DR_CHAIN is then used as an input to vect_permute_store_chain(),
7021 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
7022 next copy.
7023 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7024 OPRNDS are of size 1. */
7025 for (i = 0; i < group_size; i++)
7027 op = oprnds[i];
7028 vect_is_simple_use (op, vinfo, &rhs_dt);
7029 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt, op);
7030 dr_chain[i] = vec_oprnd;
7031 oprnds[i] = vec_oprnd;
7033 if (mask)
7034 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
7035 if (dataref_offset)
7036 dataref_offset
7037 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7038 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7039 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
7040 vec_offset);
7041 else
7042 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7043 bump);
7046 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7048 tree vec_array;
7050 /* Get an array into which we can store the individual vectors. */
7051 vec_array = create_vector_array (vectype, vec_num);
7053 /* Invalidate the current contents of VEC_ARRAY. This should
7054 become an RTL clobber too, which prevents the vector registers
7055 from being upward-exposed. */
7056 vect_clobber_variable (stmt, gsi, vec_array);
7058 /* Store the individual vectors into the array. */
7059 for (i = 0; i < vec_num; i++)
7061 vec_oprnd = dr_chain[i];
7062 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
7065 tree final_mask = NULL;
7066 if (loop_masks)
7067 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7068 vectype, j);
7069 if (vec_mask)
7070 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7071 vec_mask, gsi);
7073 gcall *call;
7074 if (final_mask)
7076 /* Emit:
7077 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7078 VEC_ARRAY). */
7079 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7080 tree alias_ptr = build_int_cst (ref_type, align);
7081 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7082 dataref_ptr, alias_ptr,
7083 final_mask, vec_array);
7085 else
7087 /* Emit:
7088 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7089 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7090 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7091 vec_array);
7092 gimple_call_set_lhs (call, data_ref);
7094 gimple_call_set_nothrow (call, true);
7095 new_stmt = call;
7096 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7098 /* Record that VEC_ARRAY is now dead. */
7099 vect_clobber_variable (stmt, gsi, vec_array);
7101 else
7103 new_stmt = NULL;
7104 if (grouped_store)
7106 if (j == 0)
7107 result_chain.create (group_size);
7108 /* Permute. */
7109 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
7110 &result_chain);
7113 next_stmt = first_stmt;
7114 for (i = 0; i < vec_num; i++)
7116 unsigned align, misalign;
7118 tree final_mask = NULL_TREE;
7119 if (loop_masks)
7120 final_mask = vect_get_loop_mask (gsi, loop_masks,
7121 vec_num * ncopies,
7122 vectype, vec_num * j + i);
7123 if (vec_mask)
7124 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7125 vec_mask, gsi);
7127 if (memory_access_type == VMAT_GATHER_SCATTER)
7129 tree scale = size_int (gs_info.scale);
7130 gcall *call;
7131 if (loop_masks)
7132 call = gimple_build_call_internal
7133 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
7134 scale, vec_oprnd, final_mask);
7135 else
7136 call = gimple_build_call_internal
7137 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7138 scale, vec_oprnd);
7139 gimple_call_set_nothrow (call, true);
7140 new_stmt = call;
7141 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7142 break;
7145 if (i > 0)
7146 /* Bump the vector pointer. */
7147 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7148 stmt, bump);
7150 if (slp)
7151 vec_oprnd = vec_oprnds[i];
7152 else if (grouped_store)
7153 /* For grouped stores vectorized defs are interleaved in
7154 vect_permute_store_chain(). */
7155 vec_oprnd = result_chain[i];
7157 align = DR_TARGET_ALIGNMENT (first_dr);
7158 if (aligned_access_p (first_dr))
7159 misalign = 0;
7160 else if (DR_MISALIGNMENT (first_dr) == -1)
7162 align = dr_alignment (vect_dr_behavior (first_dr));
7163 misalign = 0;
7165 else
7166 misalign = DR_MISALIGNMENT (first_dr);
7167 if (dataref_offset == NULL_TREE
7168 && TREE_CODE (dataref_ptr) == SSA_NAME)
7169 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7170 misalign);
7172 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7174 tree perm_mask = perm_mask_for_reverse (vectype);
7175 tree perm_dest
7176 = vect_create_destination_var (vect_get_store_rhs (stmt),
7177 vectype);
7178 tree new_temp = make_ssa_name (perm_dest);
7180 /* Generate the permute statement. */
7181 gimple *perm_stmt
7182 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7183 vec_oprnd, perm_mask);
7184 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7186 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7187 vec_oprnd = new_temp;
7190 /* Arguments are ready. Create the new vector stmt. */
7191 if (final_mask)
7193 align = least_bit_hwi (misalign | align);
7194 tree ptr = build_int_cst (ref_type, align);
7195 gcall *call
7196 = gimple_build_call_internal (IFN_MASK_STORE, 4,
7197 dataref_ptr, ptr,
7198 final_mask, vec_oprnd);
7199 gimple_call_set_nothrow (call, true);
7200 new_stmt = call;
7202 else
7204 data_ref = fold_build2 (MEM_REF, vectype,
7205 dataref_ptr,
7206 dataref_offset
7207 ? dataref_offset
7208 : build_int_cst (ref_type, 0));
7209 if (aligned_access_p (first_dr))
7211 else if (DR_MISALIGNMENT (first_dr) == -1)
7212 TREE_TYPE (data_ref)
7213 = build_aligned_type (TREE_TYPE (data_ref),
7214 align * BITS_PER_UNIT);
7215 else
7216 TREE_TYPE (data_ref)
7217 = build_aligned_type (TREE_TYPE (data_ref),
7218 TYPE_ALIGN (elem_type));
7219 vect_copy_ref_info (data_ref, DR_REF (first_dr));
7220 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
7222 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7224 if (slp)
7225 continue;
7227 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
7228 if (!next_stmt)
7229 break;
7232 if (!slp)
7234 if (j == 0)
7235 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7236 else
7237 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7238 prev_stmt_info = vinfo_for_stmt (new_stmt);
7242 oprnds.release ();
7243 result_chain.release ();
7244 vec_oprnds.release ();
7246 return true;
7249 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7250 VECTOR_CST mask. No checks are made that the target platform supports the
7251 mask, so callers may wish to test can_vec_perm_const_p separately, or use
7252 vect_gen_perm_mask_checked. */
7254 tree
7255 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
7257 tree mask_type;
7259 poly_uint64 nunits = sel.length ();
7260 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
7262 mask_type = build_vector_type (ssizetype, nunits);
7263 return vec_perm_indices_to_tree (mask_type, sel);
7266 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
7267 i.e. that the target supports the pattern _for arbitrary input vectors_. */
7269 tree
7270 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
7272 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
7273 return vect_gen_perm_mask_any (vectype, sel);
7276 /* Given a vector variable X and Y, that was generated for the scalar
7277 STMT, generate instructions to permute the vector elements of X and Y
7278 using permutation mask MASK_VEC, insert them at *GSI and return the
7279 permuted vector variable. */
7281 static tree
7282 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
7283 gimple_stmt_iterator *gsi)
7285 tree vectype = TREE_TYPE (x);
7286 tree perm_dest, data_ref;
7287 gimple *perm_stmt;
7289 tree scalar_dest = gimple_get_lhs (stmt);
7290 if (TREE_CODE (scalar_dest) == SSA_NAME)
7291 perm_dest = vect_create_destination_var (scalar_dest, vectype);
7292 else
7293 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
7294 data_ref = make_ssa_name (perm_dest);
7296 /* Generate the permute statement. */
7297 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
7298 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7300 return data_ref;
7303 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
7304 inserting them on the loops preheader edge. Returns true if we
7305 were successful in doing so (and thus STMT can be moved then),
7306 otherwise returns false. */
7308 static bool
7309 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
7311 ssa_op_iter i;
7312 tree op;
7313 bool any = false;
7315 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7317 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7318 if (!gimple_nop_p (def_stmt)
7319 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7321 /* Make sure we don't need to recurse. While we could do
7322 so in simple cases when there are more complex use webs
7323 we don't have an easy way to preserve stmt order to fulfil
7324 dependencies within them. */
7325 tree op2;
7326 ssa_op_iter i2;
7327 if (gimple_code (def_stmt) == GIMPLE_PHI)
7328 return false;
7329 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7331 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
7332 if (!gimple_nop_p (def_stmt2)
7333 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7334 return false;
7336 any = true;
7340 if (!any)
7341 return true;
7343 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7345 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7346 if (!gimple_nop_p (def_stmt)
7347 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7349 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7350 gsi_remove (&gsi, false);
7351 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7355 return true;
7358 /* vectorizable_load.
7360 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
7361 can be vectorized.
7362 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7363 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
7364 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7366 static bool
7367 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
7368 slp_tree slp_node, slp_instance slp_node_instance,
7369 stmt_vector_for_cost *cost_vec)
7371 tree scalar_dest;
7372 tree vec_dest = NULL;
7373 tree data_ref = NULL;
7374 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7375 stmt_vec_info prev_stmt_info;
7376 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7377 struct loop *loop = NULL;
7378 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
7379 bool nested_in_vect_loop = false;
7380 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
7381 tree elem_type;
7382 tree new_temp;
7383 machine_mode mode;
7384 gimple *new_stmt = NULL;
7385 tree dummy;
7386 enum dr_alignment_support alignment_support_scheme;
7387 tree dataref_ptr = NULL_TREE;
7388 tree dataref_offset = NULL_TREE;
7389 gimple *ptr_incr = NULL;
7390 int ncopies;
7391 int i, j;
7392 unsigned int group_size;
7393 poly_uint64 group_gap_adj;
7394 tree msq = NULL_TREE, lsq;
7395 tree offset = NULL_TREE;
7396 tree byte_offset = NULL_TREE;
7397 tree realignment_token = NULL_TREE;
7398 gphi *phi = NULL;
7399 vec<tree> dr_chain = vNULL;
7400 bool grouped_load = false;
7401 gimple *first_stmt;
7402 gimple *first_stmt_for_drptr = NULL;
7403 bool inv_p;
7404 bool compute_in_loop = false;
7405 struct loop *at_loop;
7406 int vec_num;
7407 bool slp = (slp_node != NULL);
7408 bool slp_perm = false;
7409 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7410 poly_uint64 vf;
7411 tree aggr_type;
7412 gather_scatter_info gs_info;
7413 vec_info *vinfo = stmt_info->vinfo;
7414 tree ref_type;
7415 enum vect_def_type mask_dt = vect_unknown_def_type;
7417 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7418 return false;
7420 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7421 && ! vec_stmt)
7422 return false;
7424 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7425 if (is_gimple_assign (stmt))
7427 scalar_dest = gimple_assign_lhs (stmt);
7428 if (TREE_CODE (scalar_dest) != SSA_NAME)
7429 return false;
7431 tree_code code = gimple_assign_rhs_code (stmt);
7432 if (code != ARRAY_REF
7433 && code != BIT_FIELD_REF
7434 && code != INDIRECT_REF
7435 && code != COMPONENT_REF
7436 && code != IMAGPART_EXPR
7437 && code != REALPART_EXPR
7438 && code != MEM_REF
7439 && TREE_CODE_CLASS (code) != tcc_declaration)
7440 return false;
7442 else
7444 gcall *call = dyn_cast <gcall *> (stmt);
7445 if (!call || !gimple_call_internal_p (call))
7446 return false;
7448 internal_fn ifn = gimple_call_internal_fn (call);
7449 if (!internal_load_fn_p (ifn))
7450 return false;
7452 scalar_dest = gimple_call_lhs (call);
7453 if (!scalar_dest)
7454 return false;
7456 if (slp_node != NULL)
7458 if (dump_enabled_p ())
7459 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7460 "SLP of masked loads not supported.\n");
7461 return false;
7464 int mask_index = internal_fn_mask_index (ifn);
7465 if (mask_index >= 0)
7467 mask = gimple_call_arg (call, mask_index);
7468 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
7469 &mask_vectype))
7470 return false;
7474 if (!STMT_VINFO_DATA_REF (stmt_info))
7475 return false;
7477 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7478 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7480 if (loop_vinfo)
7482 loop = LOOP_VINFO_LOOP (loop_vinfo);
7483 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
7484 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7486 else
7487 vf = 1;
7489 /* Multiple types in SLP are handled by creating the appropriate number of
7490 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7491 case of SLP. */
7492 if (slp)
7493 ncopies = 1;
7494 else
7495 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7497 gcc_assert (ncopies >= 1);
7499 /* FORNOW. This restriction should be relaxed. */
7500 if (nested_in_vect_loop && ncopies > 1)
7502 if (dump_enabled_p ())
7503 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7504 "multiple types in nested loop.\n");
7505 return false;
7508 /* Invalidate assumptions made by dependence analysis when vectorization
7509 on the unrolled body effectively re-orders stmts. */
7510 if (ncopies > 1
7511 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7512 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7513 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7515 if (dump_enabled_p ())
7516 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7517 "cannot perform implicit CSE when unrolling "
7518 "with negative dependence distance\n");
7519 return false;
7522 elem_type = TREE_TYPE (vectype);
7523 mode = TYPE_MODE (vectype);
7525 /* FORNOW. In some cases can vectorize even if data-type not supported
7526 (e.g. - data copies). */
7527 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
7529 if (dump_enabled_p ())
7530 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7531 "Aligned load, but unsupported type.\n");
7532 return false;
7535 /* Check if the load is a part of an interleaving chain. */
7536 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7538 grouped_load = true;
7539 /* FORNOW */
7540 gcc_assert (!nested_in_vect_loop);
7541 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
7543 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
7544 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
7546 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7547 slp_perm = true;
7549 /* Invalidate assumptions made by dependence analysis when vectorization
7550 on the unrolled body effectively re-orders stmts. */
7551 if (!PURE_SLP_STMT (stmt_info)
7552 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7553 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7554 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7556 if (dump_enabled_p ())
7557 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7558 "cannot perform implicit CSE when performing "
7559 "group loads with negative dependence distance\n");
7560 return false;
7563 /* Similarly when the stmt is a load that is both part of a SLP
7564 instance and a loop vectorized stmt via the same-dr mechanism
7565 we have to give up. */
7566 if (DR_GROUP_SAME_DR_STMT (stmt_info)
7567 && (STMT_SLP_TYPE (stmt_info)
7568 != STMT_SLP_TYPE (vinfo_for_stmt
7569 (DR_GROUP_SAME_DR_STMT (stmt_info)))))
7571 if (dump_enabled_p ())
7572 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7573 "conflicting SLP types for CSEd load\n");
7574 return false;
7577 else
7578 group_size = 1;
7580 vect_memory_access_type memory_access_type;
7581 if (!get_load_store_type (stmt, vectype, slp, mask, VLS_LOAD, ncopies,
7582 &memory_access_type, &gs_info))
7583 return false;
7585 if (mask)
7587 if (memory_access_type == VMAT_CONTIGUOUS)
7589 machine_mode vec_mode = TYPE_MODE (vectype);
7590 if (!VECTOR_MODE_P (vec_mode)
7591 || !can_vec_mask_load_store_p (vec_mode,
7592 TYPE_MODE (mask_vectype), true))
7593 return false;
7595 else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7597 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7598 tree masktype
7599 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
7600 if (TREE_CODE (masktype) == INTEGER_TYPE)
7602 if (dump_enabled_p ())
7603 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7604 "masked gather with integer mask not"
7605 " supported.");
7606 return false;
7609 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7610 && memory_access_type != VMAT_GATHER_SCATTER)
7612 if (dump_enabled_p ())
7613 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7614 "unsupported access type for masked load.\n");
7615 return false;
7619 if (!vec_stmt) /* transformation not required. */
7621 if (!slp)
7622 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7624 if (loop_vinfo
7625 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7626 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
7627 memory_access_type, &gs_info);
7629 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
7630 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7631 slp_node_instance, slp_node, cost_vec);
7632 return true;
7635 if (!slp)
7636 gcc_assert (memory_access_type
7637 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7639 if (dump_enabled_p ())
7640 dump_printf_loc (MSG_NOTE, vect_location,
7641 "transform load. ncopies = %d\n", ncopies);
7643 /* Transform. */
7645 ensure_base_align (dr);
7647 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7649 vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask,
7650 mask_dt);
7651 return true;
7654 if (memory_access_type == VMAT_ELEMENTWISE
7655 || memory_access_type == VMAT_STRIDED_SLP)
7657 gimple_stmt_iterator incr_gsi;
7658 bool insert_after;
7659 gimple *incr;
7660 tree offvar;
7661 tree ivstep;
7662 tree running_off;
7663 vec<constructor_elt, va_gc> *v = NULL;
7664 tree stride_base, stride_step, alias_off;
7665 /* Checked by get_load_store_type. */
7666 unsigned int const_nunits = nunits.to_constant ();
7667 unsigned HOST_WIDE_INT cst_offset = 0;
7669 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7670 gcc_assert (!nested_in_vect_loop);
7672 if (grouped_load)
7674 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
7675 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7677 else
7679 first_stmt = stmt;
7680 first_dr = dr;
7682 if (slp && grouped_load)
7684 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
7685 ref_type = get_group_alias_ptr_type (first_stmt);
7687 else
7689 if (grouped_load)
7690 cst_offset
7691 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
7692 * vect_get_place_in_interleaving_chain (stmt, first_stmt));
7693 group_size = 1;
7694 ref_type = reference_alias_ptr_type (DR_REF (dr));
7697 stride_base
7698 = fold_build_pointer_plus
7699 (DR_BASE_ADDRESS (first_dr),
7700 size_binop (PLUS_EXPR,
7701 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7702 convert_to_ptrofftype (DR_INIT (first_dr))));
7703 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7705 /* For a load with loop-invariant (but other than power-of-2)
7706 stride (i.e. not a grouped access) like so:
7708 for (i = 0; i < n; i += stride)
7709 ... = array[i];
7711 we generate a new induction variable and new accesses to
7712 form a new vector (or vectors, depending on ncopies):
7714 for (j = 0; ; j += VF*stride)
7715 tmp1 = array[j];
7716 tmp2 = array[j + stride];
7718 vectemp = {tmp1, tmp2, ...}
7721 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7722 build_int_cst (TREE_TYPE (stride_step), vf));
7724 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7726 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7727 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7728 create_iv (stride_base, ivstep, NULL,
7729 loop, &incr_gsi, insert_after,
7730 &offvar, NULL);
7731 incr = gsi_stmt (incr_gsi);
7732 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7734 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7736 prev_stmt_info = NULL;
7737 running_off = offvar;
7738 alias_off = build_int_cst (ref_type, 0);
7739 int nloads = const_nunits;
7740 int lnel = 1;
7741 tree ltype = TREE_TYPE (vectype);
7742 tree lvectype = vectype;
7743 auto_vec<tree> dr_chain;
7744 if (memory_access_type == VMAT_STRIDED_SLP)
7746 if (group_size < const_nunits)
7748 /* First check if vec_init optab supports construction from
7749 vector elts directly. */
7750 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7751 machine_mode vmode;
7752 if (mode_for_vector (elmode, group_size).exists (&vmode)
7753 && VECTOR_MODE_P (vmode)
7754 && targetm.vector_mode_supported_p (vmode)
7755 && (convert_optab_handler (vec_init_optab,
7756 TYPE_MODE (vectype), vmode)
7757 != CODE_FOR_nothing))
7759 nloads = const_nunits / group_size;
7760 lnel = group_size;
7761 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7763 else
7765 /* Otherwise avoid emitting a constructor of vector elements
7766 by performing the loads using an integer type of the same
7767 size, constructing a vector of those and then
7768 re-interpreting it as the original vector type.
7769 This avoids a huge runtime penalty due to the general
7770 inability to perform store forwarding from smaller stores
7771 to a larger load. */
7772 unsigned lsize
7773 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7774 elmode = int_mode_for_size (lsize, 0).require ();
7775 unsigned int lnunits = const_nunits / group_size;
7776 /* If we can't construct such a vector fall back to
7777 element loads of the original vector type. */
7778 if (mode_for_vector (elmode, lnunits).exists (&vmode)
7779 && VECTOR_MODE_P (vmode)
7780 && targetm.vector_mode_supported_p (vmode)
7781 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7782 != CODE_FOR_nothing))
7784 nloads = lnunits;
7785 lnel = group_size;
7786 ltype = build_nonstandard_integer_type (lsize, 1);
7787 lvectype = build_vector_type (ltype, nloads);
7791 else
7793 nloads = 1;
7794 lnel = const_nunits;
7795 ltype = vectype;
7797 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7799 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7800 else if (nloads == 1)
7801 ltype = vectype;
7803 if (slp)
7805 /* For SLP permutation support we need to load the whole group,
7806 not only the number of vector stmts the permutation result
7807 fits in. */
7808 if (slp_perm)
7810 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7811 variable VF. */
7812 unsigned int const_vf = vf.to_constant ();
7813 ncopies = CEIL (group_size * const_vf, const_nunits);
7814 dr_chain.create (ncopies);
7816 else
7817 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7819 unsigned int group_el = 0;
7820 unsigned HOST_WIDE_INT
7821 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7822 for (j = 0; j < ncopies; j++)
7824 if (nloads > 1)
7825 vec_alloc (v, nloads);
7826 for (i = 0; i < nloads; i++)
7828 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7829 group_el * elsz + cst_offset);
7830 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
7831 vect_copy_ref_info (data_ref, DR_REF (first_dr));
7832 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
7833 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7834 if (nloads > 1)
7835 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7836 gimple_assign_lhs (new_stmt));
7838 group_el += lnel;
7839 if (! slp
7840 || group_el == group_size)
7842 tree newoff = copy_ssa_name (running_off);
7843 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7844 running_off, stride_step);
7845 vect_finish_stmt_generation (stmt, incr, gsi);
7847 running_off = newoff;
7848 group_el = 0;
7851 if (nloads > 1)
7853 tree vec_inv = build_constructor (lvectype, v);
7854 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7855 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7856 if (lvectype != vectype)
7858 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7859 VIEW_CONVERT_EXPR,
7860 build1 (VIEW_CONVERT_EXPR,
7861 vectype, new_temp));
7862 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7866 if (slp)
7868 if (slp_perm)
7869 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7870 else
7871 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7873 else
7875 if (j == 0)
7876 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7877 else
7878 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7879 prev_stmt_info = vinfo_for_stmt (new_stmt);
7882 if (slp_perm)
7884 unsigned n_perms;
7885 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7886 slp_node_instance, false, &n_perms);
7888 return true;
7891 if (memory_access_type == VMAT_GATHER_SCATTER
7892 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
7893 grouped_load = false;
7895 if (grouped_load)
7897 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
7898 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
7899 /* For SLP vectorization we directly vectorize a subchain
7900 without permutation. */
7901 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7902 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7903 /* For BB vectorization always use the first stmt to base
7904 the data ref pointer on. */
7905 if (bb_vinfo)
7906 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7908 /* Check if the chain of loads is already vectorized. */
7909 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7910 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7911 ??? But we can only do so if there is exactly one
7912 as we have no way to get at the rest. Leave the CSE
7913 opportunity alone.
7914 ??? With the group load eventually participating
7915 in multiple different permutations (having multiple
7916 slp nodes which refer to the same group) the CSE
7917 is even wrong code. See PR56270. */
7918 && !slp)
7920 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7921 return true;
7923 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7924 group_gap_adj = 0;
7926 /* VEC_NUM is the number of vect stmts to be created for this group. */
7927 if (slp)
7929 grouped_load = false;
7930 /* For SLP permutation support we need to load the whole group,
7931 not only the number of vector stmts the permutation result
7932 fits in. */
7933 if (slp_perm)
7935 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7936 variable VF. */
7937 unsigned int const_vf = vf.to_constant ();
7938 unsigned int const_nunits = nunits.to_constant ();
7939 vec_num = CEIL (group_size * const_vf, const_nunits);
7940 group_gap_adj = vf * group_size - nunits * vec_num;
7942 else
7944 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7945 group_gap_adj
7946 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
7949 else
7950 vec_num = group_size;
7952 ref_type = get_group_alias_ptr_type (first_stmt);
7954 else
7956 first_stmt = stmt;
7957 first_dr = dr;
7958 group_size = vec_num = 1;
7959 group_gap_adj = 0;
7960 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7963 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7964 gcc_assert (alignment_support_scheme);
7965 vec_loop_masks *loop_masks
7966 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7967 ? &LOOP_VINFO_MASKS (loop_vinfo)
7968 : NULL);
7969 /* Targets with store-lane instructions must not require explicit
7970 realignment. vect_supportable_dr_alignment always returns either
7971 dr_aligned or dr_unaligned_supported for masked operations. */
7972 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7973 && !mask
7974 && !loop_masks)
7975 || alignment_support_scheme == dr_aligned
7976 || alignment_support_scheme == dr_unaligned_supported);
7978 /* In case the vectorization factor (VF) is bigger than the number
7979 of elements that we can fit in a vectype (nunits), we have to generate
7980 more than one vector stmt - i.e - we need to "unroll" the
7981 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7982 from one copy of the vector stmt to the next, in the field
7983 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7984 stages to find the correct vector defs to be used when vectorizing
7985 stmts that use the defs of the current stmt. The example below
7986 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7987 need to create 4 vectorized stmts):
7989 before vectorization:
7990 RELATED_STMT VEC_STMT
7991 S1: x = memref - -
7992 S2: z = x + 1 - -
7994 step 1: vectorize stmt S1:
7995 We first create the vector stmt VS1_0, and, as usual, record a
7996 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7997 Next, we create the vector stmt VS1_1, and record a pointer to
7998 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7999 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
8000 stmts and pointers:
8001 RELATED_STMT VEC_STMT
8002 VS1_0: vx0 = memref0 VS1_1 -
8003 VS1_1: vx1 = memref1 VS1_2 -
8004 VS1_2: vx2 = memref2 VS1_3 -
8005 VS1_3: vx3 = memref3 - -
8006 S1: x = load - VS1_0
8007 S2: z = x + 1 - -
8009 See in documentation in vect_get_vec_def_for_stmt_copy for how the
8010 information we recorded in RELATED_STMT field is used to vectorize
8011 stmt S2. */
8013 /* In case of interleaving (non-unit grouped access):
8015 S1: x2 = &base + 2
8016 S2: x0 = &base
8017 S3: x1 = &base + 1
8018 S4: x3 = &base + 3
8020 Vectorized loads are created in the order of memory accesses
8021 starting from the access of the first stmt of the chain:
8023 VS1: vx0 = &base
8024 VS2: vx1 = &base + vec_size*1
8025 VS3: vx3 = &base + vec_size*2
8026 VS4: vx4 = &base + vec_size*3
8028 Then permutation statements are generated:
8030 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
8031 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
8034 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8035 (the order of the data-refs in the output of vect_permute_load_chain
8036 corresponds to the order of scalar stmts in the interleaving chain - see
8037 the documentation of vect_permute_load_chain()).
8038 The generation of permutation stmts and recording them in
8039 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
8041 In case of both multiple types and interleaving, the vector loads and
8042 permutation stmts above are created for every copy. The result vector
8043 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8044 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
8046 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8047 on a target that supports unaligned accesses (dr_unaligned_supported)
8048 we generate the following code:
8049 p = initial_addr;
8050 indx = 0;
8051 loop {
8052 p = p + indx * vectype_size;
8053 vec_dest = *(p);
8054 indx = indx + 1;
8057 Otherwise, the data reference is potentially unaligned on a target that
8058 does not support unaligned accesses (dr_explicit_realign_optimized) -
8059 then generate the following code, in which the data in each iteration is
8060 obtained by two vector loads, one from the previous iteration, and one
8061 from the current iteration:
8062 p1 = initial_addr;
8063 msq_init = *(floor(p1))
8064 p2 = initial_addr + VS - 1;
8065 realignment_token = call target_builtin;
8066 indx = 0;
8067 loop {
8068 p2 = p2 + indx * vectype_size
8069 lsq = *(floor(p2))
8070 vec_dest = realign_load (msq, lsq, realignment_token)
8071 indx = indx + 1;
8072 msq = lsq;
8073 } */
8075 /* If the misalignment remains the same throughout the execution of the
8076 loop, we can create the init_addr and permutation mask at the loop
8077 preheader. Otherwise, it needs to be created inside the loop.
8078 This can only occur when vectorizing memory accesses in the inner-loop
8079 nested within an outer-loop that is being vectorized. */
8081 if (nested_in_vect_loop
8082 && !multiple_p (DR_STEP_ALIGNMENT (dr),
8083 GET_MODE_SIZE (TYPE_MODE (vectype))))
8085 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
8086 compute_in_loop = true;
8089 if ((alignment_support_scheme == dr_explicit_realign_optimized
8090 || alignment_support_scheme == dr_explicit_realign)
8091 && !compute_in_loop)
8093 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
8094 alignment_support_scheme, NULL_TREE,
8095 &at_loop);
8096 if (alignment_support_scheme == dr_explicit_realign_optimized)
8098 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
8099 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
8100 size_one_node);
8103 else
8104 at_loop = loop;
8106 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8107 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8109 tree bump;
8110 tree vec_offset = NULL_TREE;
8111 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8113 aggr_type = NULL_TREE;
8114 bump = NULL_TREE;
8116 else if (memory_access_type == VMAT_GATHER_SCATTER)
8118 aggr_type = elem_type;
8119 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
8120 &bump, &vec_offset);
8122 else
8124 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8125 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8126 else
8127 aggr_type = vectype;
8128 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
8131 tree vec_mask = NULL_TREE;
8132 prev_stmt_info = NULL;
8133 poly_uint64 group_elt = 0;
8134 for (j = 0; j < ncopies; j++)
8136 /* 1. Create the vector or array pointer update chain. */
8137 if (j == 0)
8139 bool simd_lane_access_p
8140 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8141 if (simd_lane_access_p
8142 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
8143 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
8144 && integer_zerop (DR_OFFSET (first_dr))
8145 && integer_zerop (DR_INIT (first_dr))
8146 && alias_sets_conflict_p (get_alias_set (aggr_type),
8147 get_alias_set (TREE_TYPE (ref_type)))
8148 && (alignment_support_scheme == dr_aligned
8149 || alignment_support_scheme == dr_unaligned_supported))
8151 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
8152 dataref_offset = build_int_cst (ref_type, 0);
8153 inv_p = false;
8155 else if (first_stmt_for_drptr
8156 && first_stmt != first_stmt_for_drptr)
8158 dataref_ptr
8159 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
8160 at_loop, offset, &dummy, gsi,
8161 &ptr_incr, simd_lane_access_p,
8162 &inv_p, byte_offset, bump);
8163 /* Adjust the pointer by the difference to first_stmt. */
8164 data_reference_p ptrdr
8165 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
8166 tree diff = fold_convert (sizetype,
8167 size_binop (MINUS_EXPR,
8168 DR_INIT (first_dr),
8169 DR_INIT (ptrdr)));
8170 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8171 stmt, diff);
8173 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8175 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
8176 &dataref_ptr, &vec_offset);
8177 inv_p = false;
8179 else
8180 dataref_ptr
8181 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
8182 offset, &dummy, gsi, &ptr_incr,
8183 simd_lane_access_p, &inv_p,
8184 byte_offset, bump);
8185 if (mask)
8186 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
8187 mask_vectype);
8189 else
8191 if (dataref_offset)
8192 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
8193 bump);
8194 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8195 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
8196 vec_offset);
8197 else
8198 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8199 stmt, bump);
8200 if (mask)
8201 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
8204 if (grouped_load || slp_perm)
8205 dr_chain.create (vec_num);
8207 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8209 tree vec_array;
8211 vec_array = create_vector_array (vectype, vec_num);
8213 tree final_mask = NULL_TREE;
8214 if (loop_masks)
8215 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8216 vectype, j);
8217 if (vec_mask)
8218 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8219 vec_mask, gsi);
8221 gcall *call;
8222 if (final_mask)
8224 /* Emit:
8225 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8226 VEC_MASK). */
8227 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8228 tree alias_ptr = build_int_cst (ref_type, align);
8229 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8230 dataref_ptr, alias_ptr,
8231 final_mask);
8233 else
8235 /* Emit:
8236 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8237 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8238 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8240 gimple_call_set_lhs (call, vec_array);
8241 gimple_call_set_nothrow (call, true);
8242 new_stmt = call;
8243 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8245 /* Extract each vector into an SSA_NAME. */
8246 for (i = 0; i < vec_num; i++)
8248 new_temp = read_vector_array (stmt, gsi, scalar_dest,
8249 vec_array, i);
8250 dr_chain.quick_push (new_temp);
8253 /* Record the mapping between SSA_NAMEs and statements. */
8254 vect_record_grouped_load_vectors (stmt, dr_chain);
8256 /* Record that VEC_ARRAY is now dead. */
8257 vect_clobber_variable (stmt, gsi, vec_array);
8259 else
8261 for (i = 0; i < vec_num; i++)
8263 tree final_mask = NULL_TREE;
8264 if (loop_masks
8265 && memory_access_type != VMAT_INVARIANT)
8266 final_mask = vect_get_loop_mask (gsi, loop_masks,
8267 vec_num * ncopies,
8268 vectype, vec_num * j + i);
8269 if (vec_mask)
8270 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8271 vec_mask, gsi);
8273 if (i > 0)
8274 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8275 stmt, bump);
8277 /* 2. Create the vector-load in the loop. */
8278 switch (alignment_support_scheme)
8280 case dr_aligned:
8281 case dr_unaligned_supported:
8283 unsigned int align, misalign;
8285 if (memory_access_type == VMAT_GATHER_SCATTER)
8287 tree scale = size_int (gs_info.scale);
8288 gcall *call;
8289 if (loop_masks)
8290 call = gimple_build_call_internal
8291 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8292 vec_offset, scale, final_mask);
8293 else
8294 call = gimple_build_call_internal
8295 (IFN_GATHER_LOAD, 3, dataref_ptr,
8296 vec_offset, scale);
8297 gimple_call_set_nothrow (call, true);
8298 new_stmt = call;
8299 data_ref = NULL_TREE;
8300 break;
8303 align = DR_TARGET_ALIGNMENT (dr);
8304 if (alignment_support_scheme == dr_aligned)
8306 gcc_assert (aligned_access_p (first_dr));
8307 misalign = 0;
8309 else if (DR_MISALIGNMENT (first_dr) == -1)
8311 align = dr_alignment (vect_dr_behavior (first_dr));
8312 misalign = 0;
8314 else
8315 misalign = DR_MISALIGNMENT (first_dr);
8316 if (dataref_offset == NULL_TREE
8317 && TREE_CODE (dataref_ptr) == SSA_NAME)
8318 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8319 align, misalign);
8321 if (final_mask)
8323 align = least_bit_hwi (misalign | align);
8324 tree ptr = build_int_cst (ref_type, align);
8325 gcall *call
8326 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8327 dataref_ptr, ptr,
8328 final_mask);
8329 gimple_call_set_nothrow (call, true);
8330 new_stmt = call;
8331 data_ref = NULL_TREE;
8333 else
8335 data_ref
8336 = fold_build2 (MEM_REF, vectype, dataref_ptr,
8337 dataref_offset
8338 ? dataref_offset
8339 : build_int_cst (ref_type, 0));
8340 if (alignment_support_scheme == dr_aligned)
8342 else if (DR_MISALIGNMENT (first_dr) == -1)
8343 TREE_TYPE (data_ref)
8344 = build_aligned_type (TREE_TYPE (data_ref),
8345 align * BITS_PER_UNIT);
8346 else
8347 TREE_TYPE (data_ref)
8348 = build_aligned_type (TREE_TYPE (data_ref),
8349 TYPE_ALIGN (elem_type));
8351 break;
8353 case dr_explicit_realign:
8355 tree ptr, bump;
8357 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8359 if (compute_in_loop)
8360 msq = vect_setup_realignment (first_stmt, gsi,
8361 &realignment_token,
8362 dr_explicit_realign,
8363 dataref_ptr, NULL);
8365 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8366 ptr = copy_ssa_name (dataref_ptr);
8367 else
8368 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
8369 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8370 new_stmt = gimple_build_assign
8371 (ptr, BIT_AND_EXPR, dataref_ptr,
8372 build_int_cst
8373 (TREE_TYPE (dataref_ptr),
8374 -(HOST_WIDE_INT) align));
8375 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8376 data_ref
8377 = build2 (MEM_REF, vectype, ptr,
8378 build_int_cst (ref_type, 0));
8379 vect_copy_ref_info (data_ref, DR_REF (first_dr));
8380 vec_dest = vect_create_destination_var (scalar_dest,
8381 vectype);
8382 new_stmt = gimple_build_assign (vec_dest, data_ref);
8383 new_temp = make_ssa_name (vec_dest, new_stmt);
8384 gimple_assign_set_lhs (new_stmt, new_temp);
8385 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
8386 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
8387 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8388 msq = new_temp;
8390 bump = size_binop (MULT_EXPR, vs,
8391 TYPE_SIZE_UNIT (elem_type));
8392 bump = size_binop (MINUS_EXPR, bump, size_one_node);
8393 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
8394 new_stmt = gimple_build_assign
8395 (NULL_TREE, BIT_AND_EXPR, ptr,
8396 build_int_cst
8397 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
8398 ptr = copy_ssa_name (ptr, new_stmt);
8399 gimple_assign_set_lhs (new_stmt, ptr);
8400 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8401 data_ref
8402 = build2 (MEM_REF, vectype, ptr,
8403 build_int_cst (ref_type, 0));
8404 break;
8406 case dr_explicit_realign_optimized:
8408 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8409 new_temp = copy_ssa_name (dataref_ptr);
8410 else
8411 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8412 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8413 new_stmt = gimple_build_assign
8414 (new_temp, BIT_AND_EXPR, dataref_ptr,
8415 build_int_cst (TREE_TYPE (dataref_ptr),
8416 -(HOST_WIDE_INT) align));
8417 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8418 data_ref
8419 = build2 (MEM_REF, vectype, new_temp,
8420 build_int_cst (ref_type, 0));
8421 break;
8423 default:
8424 gcc_unreachable ();
8426 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8427 /* DATA_REF is null if we've already built the statement. */
8428 if (data_ref)
8430 vect_copy_ref_info (data_ref, DR_REF (first_dr));
8431 new_stmt = gimple_build_assign (vec_dest, data_ref);
8433 new_temp = make_ssa_name (vec_dest, new_stmt);
8434 gimple_set_lhs (new_stmt, new_temp);
8435 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8437 /* 3. Handle explicit realignment if necessary/supported.
8438 Create in loop:
8439 vec_dest = realign_load (msq, lsq, realignment_token) */
8440 if (alignment_support_scheme == dr_explicit_realign_optimized
8441 || alignment_support_scheme == dr_explicit_realign)
8443 lsq = gimple_assign_lhs (new_stmt);
8444 if (!realignment_token)
8445 realignment_token = dataref_ptr;
8446 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8447 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8448 msq, lsq, realignment_token);
8449 new_temp = make_ssa_name (vec_dest, new_stmt);
8450 gimple_assign_set_lhs (new_stmt, new_temp);
8451 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8453 if (alignment_support_scheme == dr_explicit_realign_optimized)
8455 gcc_assert (phi);
8456 if (i == vec_num - 1 && j == ncopies - 1)
8457 add_phi_arg (phi, lsq,
8458 loop_latch_edge (containing_loop),
8459 UNKNOWN_LOCATION);
8460 msq = lsq;
8464 /* 4. Handle invariant-load. */
8465 if (inv_p && !bb_vinfo)
8467 gcc_assert (!grouped_load);
8468 /* If we have versioned for aliasing or the loop doesn't
8469 have any data dependencies that would preclude this,
8470 then we are sure this is a loop invariant load and
8471 thus we can insert it on the preheader edge. */
8472 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8473 && !nested_in_vect_loop
8474 && hoist_defs_of_uses (stmt, loop))
8476 if (dump_enabled_p ())
8478 dump_printf_loc (MSG_NOTE, vect_location,
8479 "hoisting out of the vectorized "
8480 "loop: ");
8481 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8483 tree tem = copy_ssa_name (scalar_dest);
8484 gsi_insert_on_edge_immediate
8485 (loop_preheader_edge (loop),
8486 gimple_build_assign (tem,
8487 unshare_expr
8488 (gimple_assign_rhs1 (stmt))));
8489 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
8490 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8491 set_vinfo_for_stmt (new_stmt,
8492 new_stmt_vec_info (new_stmt, vinfo));
8494 else
8496 gimple_stmt_iterator gsi2 = *gsi;
8497 gsi_next (&gsi2);
8498 new_temp = vect_init_vector (stmt, scalar_dest,
8499 vectype, &gsi2);
8500 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8504 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8506 tree perm_mask = perm_mask_for_reverse (vectype);
8507 new_temp = permute_vec_elements (new_temp, new_temp,
8508 perm_mask, stmt, gsi);
8509 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8512 /* Collect vector loads and later create their permutation in
8513 vect_transform_grouped_load (). */
8514 if (grouped_load || slp_perm)
8515 dr_chain.quick_push (new_temp);
8517 /* Store vector loads in the corresponding SLP_NODE. */
8518 if (slp && !slp_perm)
8519 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8521 /* With SLP permutation we load the gaps as well, without
8522 we need to skip the gaps after we manage to fully load
8523 all elements. group_gap_adj is DR_GROUP_SIZE here. */
8524 group_elt += nunits;
8525 if (maybe_ne (group_gap_adj, 0U)
8526 && !slp_perm
8527 && known_eq (group_elt, group_size - group_gap_adj))
8529 poly_wide_int bump_val
8530 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8531 * group_gap_adj);
8532 tree bump = wide_int_to_tree (sizetype, bump_val);
8533 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8534 stmt, bump);
8535 group_elt = 0;
8538 /* Bump the vector pointer to account for a gap or for excess
8539 elements loaded for a permuted SLP load. */
8540 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
8542 poly_wide_int bump_val
8543 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8544 * group_gap_adj);
8545 tree bump = wide_int_to_tree (sizetype, bump_val);
8546 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8547 stmt, bump);
8551 if (slp && !slp_perm)
8552 continue;
8554 if (slp_perm)
8556 unsigned n_perms;
8557 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
8558 slp_node_instance, false,
8559 &n_perms))
8561 dr_chain.release ();
8562 return false;
8565 else
8567 if (grouped_load)
8569 if (memory_access_type != VMAT_LOAD_STORE_LANES)
8570 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
8571 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8573 else
8575 if (j == 0)
8576 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8577 else
8578 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8579 prev_stmt_info = vinfo_for_stmt (new_stmt);
8582 dr_chain.release ();
8585 return true;
8588 /* Function vect_is_simple_cond.
8590 Input:
8591 LOOP - the loop that is being vectorized.
8592 COND - Condition that is checked for simple use.
8594 Output:
8595 *COMP_VECTYPE - the vector type for the comparison.
8596 *DTS - The def types for the arguments of the comparison
8598 Returns whether a COND can be vectorized. Checks whether
8599 condition operands are supportable using vec_is_simple_use. */
8601 static bool
8602 vect_is_simple_cond (tree cond, vec_info *vinfo,
8603 tree *comp_vectype, enum vect_def_type *dts,
8604 tree vectype)
8606 tree lhs, rhs;
8607 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8609 /* Mask case. */
8610 if (TREE_CODE (cond) == SSA_NAME
8611 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
8613 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
8614 || !*comp_vectype
8615 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8616 return false;
8617 return true;
8620 if (!COMPARISON_CLASS_P (cond))
8621 return false;
8623 lhs = TREE_OPERAND (cond, 0);
8624 rhs = TREE_OPERAND (cond, 1);
8626 if (TREE_CODE (lhs) == SSA_NAME)
8628 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
8629 return false;
8631 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8632 || TREE_CODE (lhs) == FIXED_CST)
8633 dts[0] = vect_constant_def;
8634 else
8635 return false;
8637 if (TREE_CODE (rhs) == SSA_NAME)
8639 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
8640 return false;
8642 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8643 || TREE_CODE (rhs) == FIXED_CST)
8644 dts[1] = vect_constant_def;
8645 else
8646 return false;
8648 if (vectype1 && vectype2
8649 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8650 TYPE_VECTOR_SUBPARTS (vectype2)))
8651 return false;
8653 *comp_vectype = vectype1 ? vectype1 : vectype2;
8654 /* Invariant comparison. */
8655 if (! *comp_vectype && vectype)
8657 tree scalar_type = TREE_TYPE (lhs);
8658 /* If we can widen the comparison to match vectype do so. */
8659 if (INTEGRAL_TYPE_P (scalar_type)
8660 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8661 TYPE_SIZE (TREE_TYPE (vectype))))
8662 scalar_type = build_nonstandard_integer_type
8663 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8664 TYPE_UNSIGNED (scalar_type));
8665 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8668 return true;
8671 /* vectorizable_condition.
8673 Check if STMT is conditional modify expression that can be vectorized.
8674 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8675 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
8676 at GSI.
8678 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
8679 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
8680 else clause if it is 2).
8682 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8684 bool
8685 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
8686 gimple **vec_stmt, tree reduc_def, int reduc_index,
8687 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
8689 tree scalar_dest = NULL_TREE;
8690 tree vec_dest = NULL_TREE;
8691 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8692 tree then_clause, else_clause;
8693 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8694 tree comp_vectype = NULL_TREE;
8695 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8696 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
8697 tree vec_compare;
8698 tree new_temp;
8699 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8700 enum vect_def_type dts[4]
8701 = {vect_unknown_def_type, vect_unknown_def_type,
8702 vect_unknown_def_type, vect_unknown_def_type};
8703 int ndts = 4;
8704 int ncopies;
8705 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8706 stmt_vec_info prev_stmt_info = NULL;
8707 int i, j;
8708 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8709 vec<tree> vec_oprnds0 = vNULL;
8710 vec<tree> vec_oprnds1 = vNULL;
8711 vec<tree> vec_oprnds2 = vNULL;
8712 vec<tree> vec_oprnds3 = vNULL;
8713 tree vec_cmp_type;
8714 bool masked = false;
8716 if (reduc_index && STMT_SLP_TYPE (stmt_info))
8717 return false;
8719 vect_reduction_type reduction_type
8720 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8721 if (reduction_type == TREE_CODE_REDUCTION)
8723 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8724 return false;
8726 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8727 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8728 && reduc_def))
8729 return false;
8731 /* FORNOW: not yet supported. */
8732 if (STMT_VINFO_LIVE_P (stmt_info))
8734 if (dump_enabled_p ())
8735 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8736 "value used after loop.\n");
8737 return false;
8741 /* Is vectorizable conditional operation? */
8742 if (!is_gimple_assign (stmt))
8743 return false;
8745 code = gimple_assign_rhs_code (stmt);
8747 if (code != COND_EXPR)
8748 return false;
8750 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8751 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8753 if (slp_node)
8754 ncopies = 1;
8755 else
8756 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8758 gcc_assert (ncopies >= 1);
8759 if (reduc_index && ncopies > 1)
8760 return false; /* FORNOW */
8762 cond_expr = gimple_assign_rhs1 (stmt);
8763 then_clause = gimple_assign_rhs2 (stmt);
8764 else_clause = gimple_assign_rhs3 (stmt);
8766 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8767 &comp_vectype, &dts[0], slp_node ? NULL : vectype)
8768 || !comp_vectype)
8769 return false;
8771 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
8772 return false;
8773 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
8774 return false;
8776 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8777 return false;
8779 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
8780 return false;
8782 masked = !COMPARISON_CLASS_P (cond_expr);
8783 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8785 if (vec_cmp_type == NULL_TREE)
8786 return false;
8788 cond_code = TREE_CODE (cond_expr);
8789 if (!masked)
8791 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8792 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8795 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8797 /* Boolean values may have another representation in vectors
8798 and therefore we prefer bit operations over comparison for
8799 them (which also works for scalar masks). We store opcodes
8800 to use in bitop1 and bitop2. Statement is vectorized as
8801 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8802 depending on bitop1 and bitop2 arity. */
8803 switch (cond_code)
8805 case GT_EXPR:
8806 bitop1 = BIT_NOT_EXPR;
8807 bitop2 = BIT_AND_EXPR;
8808 break;
8809 case GE_EXPR:
8810 bitop1 = BIT_NOT_EXPR;
8811 bitop2 = BIT_IOR_EXPR;
8812 break;
8813 case LT_EXPR:
8814 bitop1 = BIT_NOT_EXPR;
8815 bitop2 = BIT_AND_EXPR;
8816 std::swap (cond_expr0, cond_expr1);
8817 break;
8818 case LE_EXPR:
8819 bitop1 = BIT_NOT_EXPR;
8820 bitop2 = BIT_IOR_EXPR;
8821 std::swap (cond_expr0, cond_expr1);
8822 break;
8823 case NE_EXPR:
8824 bitop1 = BIT_XOR_EXPR;
8825 break;
8826 case EQ_EXPR:
8827 bitop1 = BIT_XOR_EXPR;
8828 bitop2 = BIT_NOT_EXPR;
8829 break;
8830 default:
8831 return false;
8833 cond_code = SSA_NAME;
8836 if (!vec_stmt)
8838 if (bitop1 != NOP_EXPR)
8840 machine_mode mode = TYPE_MODE (comp_vectype);
8841 optab optab;
8843 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8844 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8845 return false;
8847 if (bitop2 != NOP_EXPR)
8849 optab = optab_for_tree_code (bitop2, comp_vectype,
8850 optab_default);
8851 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8852 return false;
8855 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8856 cond_code))
8858 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8859 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
8860 cost_vec);
8861 return true;
8863 return false;
8866 /* Transform. */
8868 if (!slp_node)
8870 vec_oprnds0.create (1);
8871 vec_oprnds1.create (1);
8872 vec_oprnds2.create (1);
8873 vec_oprnds3.create (1);
8876 /* Handle def. */
8877 scalar_dest = gimple_assign_lhs (stmt);
8878 if (reduction_type != EXTRACT_LAST_REDUCTION)
8879 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8881 /* Handle cond expr. */
8882 for (j = 0; j < ncopies; j++)
8884 gimple *new_stmt = NULL;
8885 if (j == 0)
8887 if (slp_node)
8889 auto_vec<tree, 4> ops;
8890 auto_vec<vec<tree>, 4> vec_defs;
8892 if (masked)
8893 ops.safe_push (cond_expr);
8894 else
8896 ops.safe_push (cond_expr0);
8897 ops.safe_push (cond_expr1);
8899 ops.safe_push (then_clause);
8900 ops.safe_push (else_clause);
8901 vect_get_slp_defs (ops, slp_node, &vec_defs);
8902 vec_oprnds3 = vec_defs.pop ();
8903 vec_oprnds2 = vec_defs.pop ();
8904 if (!masked)
8905 vec_oprnds1 = vec_defs.pop ();
8906 vec_oprnds0 = vec_defs.pop ();
8908 else
8910 if (masked)
8912 vec_cond_lhs
8913 = vect_get_vec_def_for_operand (cond_expr, stmt,
8914 comp_vectype);
8915 vect_is_simple_use (cond_expr, stmt_info->vinfo, &dts[0]);
8917 else
8919 vec_cond_lhs
8920 = vect_get_vec_def_for_operand (cond_expr0,
8921 stmt, comp_vectype);
8922 vect_is_simple_use (cond_expr0, loop_vinfo, &dts[0]);
8924 vec_cond_rhs
8925 = vect_get_vec_def_for_operand (cond_expr1,
8926 stmt, comp_vectype);
8927 vect_is_simple_use (cond_expr1, loop_vinfo, &dts[1]);
8929 if (reduc_index == 1)
8930 vec_then_clause = reduc_def;
8931 else
8933 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8934 stmt);
8935 vect_is_simple_use (then_clause, loop_vinfo, &dts[2]);
8937 if (reduc_index == 2)
8938 vec_else_clause = reduc_def;
8939 else
8941 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8942 stmt);
8943 vect_is_simple_use (else_clause, loop_vinfo, &dts[3]);
8947 else
8949 vec_cond_lhs
8950 = vect_get_vec_def_for_stmt_copy (dts[0],
8951 vec_oprnds0.pop ());
8952 if (!masked)
8953 vec_cond_rhs
8954 = vect_get_vec_def_for_stmt_copy (dts[1],
8955 vec_oprnds1.pop ());
8957 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8958 vec_oprnds2.pop ());
8959 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8960 vec_oprnds3.pop ());
8963 if (!slp_node)
8965 vec_oprnds0.quick_push (vec_cond_lhs);
8966 if (!masked)
8967 vec_oprnds1.quick_push (vec_cond_rhs);
8968 vec_oprnds2.quick_push (vec_then_clause);
8969 vec_oprnds3.quick_push (vec_else_clause);
8972 /* Arguments are ready. Create the new vector stmt. */
8973 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8975 vec_then_clause = vec_oprnds2[i];
8976 vec_else_clause = vec_oprnds3[i];
8978 if (masked)
8979 vec_compare = vec_cond_lhs;
8980 else
8982 vec_cond_rhs = vec_oprnds1[i];
8983 if (bitop1 == NOP_EXPR)
8984 vec_compare = build2 (cond_code, vec_cmp_type,
8985 vec_cond_lhs, vec_cond_rhs);
8986 else
8988 new_temp = make_ssa_name (vec_cmp_type);
8989 if (bitop1 == BIT_NOT_EXPR)
8990 new_stmt = gimple_build_assign (new_temp, bitop1,
8991 vec_cond_rhs);
8992 else
8993 new_stmt
8994 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8995 vec_cond_rhs);
8996 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8997 if (bitop2 == NOP_EXPR)
8998 vec_compare = new_temp;
8999 else if (bitop2 == BIT_NOT_EXPR)
9001 /* Instead of doing ~x ? y : z do x ? z : y. */
9002 vec_compare = new_temp;
9003 std::swap (vec_then_clause, vec_else_clause);
9005 else
9007 vec_compare = make_ssa_name (vec_cmp_type);
9008 new_stmt
9009 = gimple_build_assign (vec_compare, bitop2,
9010 vec_cond_lhs, new_temp);
9011 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9015 if (reduction_type == EXTRACT_LAST_REDUCTION)
9017 if (!is_gimple_val (vec_compare))
9019 tree vec_compare_name = make_ssa_name (vec_cmp_type);
9020 new_stmt = gimple_build_assign (vec_compare_name,
9021 vec_compare);
9022 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9023 vec_compare = vec_compare_name;
9025 gcc_assert (reduc_index == 2);
9026 new_stmt = gimple_build_call_internal
9027 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
9028 vec_then_clause);
9029 gimple_call_set_lhs (new_stmt, scalar_dest);
9030 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
9031 if (stmt == gsi_stmt (*gsi))
9032 vect_finish_replace_stmt (stmt, new_stmt);
9033 else
9035 /* In this case we're moving the definition to later in the
9036 block. That doesn't matter because the only uses of the
9037 lhs are in phi statements. */
9038 gimple_stmt_iterator old_gsi = gsi_for_stmt (stmt);
9039 gsi_remove (&old_gsi, true);
9040 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9043 else
9045 new_temp = make_ssa_name (vec_dest);
9046 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
9047 vec_compare, vec_then_clause,
9048 vec_else_clause);
9049 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9051 if (slp_node)
9052 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9055 if (slp_node)
9056 continue;
9058 if (j == 0)
9059 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
9060 else
9061 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
9063 prev_stmt_info = vinfo_for_stmt (new_stmt);
9066 vec_oprnds0.release ();
9067 vec_oprnds1.release ();
9068 vec_oprnds2.release ();
9069 vec_oprnds3.release ();
9071 return true;
9074 /* vectorizable_comparison.
9076 Check if STMT is comparison expression that can be vectorized.
9077 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
9078 comparison, put it in VEC_STMT, and insert it at GSI.
9080 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
9082 static bool
9083 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
9084 gimple **vec_stmt, tree reduc_def,
9085 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9087 tree lhs, rhs1, rhs2;
9088 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9089 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9090 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9091 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
9092 tree new_temp;
9093 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9094 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
9095 int ndts = 2;
9096 poly_uint64 nunits;
9097 int ncopies;
9098 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9099 stmt_vec_info prev_stmt_info = NULL;
9100 int i, j;
9101 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9102 vec<tree> vec_oprnds0 = vNULL;
9103 vec<tree> vec_oprnds1 = vNULL;
9104 tree mask_type;
9105 tree mask;
9107 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9108 return false;
9110 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
9111 return false;
9113 mask_type = vectype;
9114 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9116 if (slp_node)
9117 ncopies = 1;
9118 else
9119 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9121 gcc_assert (ncopies >= 1);
9122 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9123 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
9124 && reduc_def))
9125 return false;
9127 if (STMT_VINFO_LIVE_P (stmt_info))
9129 if (dump_enabled_p ())
9130 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9131 "value used after loop.\n");
9132 return false;
9135 if (!is_gimple_assign (stmt))
9136 return false;
9138 code = gimple_assign_rhs_code (stmt);
9140 if (TREE_CODE_CLASS (code) != tcc_comparison)
9141 return false;
9143 rhs1 = gimple_assign_rhs1 (stmt);
9144 rhs2 = gimple_assign_rhs2 (stmt);
9146 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
9147 return false;
9149 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
9150 return false;
9152 if (vectype1 && vectype2
9153 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9154 TYPE_VECTOR_SUBPARTS (vectype2)))
9155 return false;
9157 vectype = vectype1 ? vectype1 : vectype2;
9159 /* Invariant comparison. */
9160 if (!vectype)
9162 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
9163 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
9164 return false;
9166 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
9167 return false;
9169 /* Can't compare mask and non-mask types. */
9170 if (vectype1 && vectype2
9171 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9172 return false;
9174 /* Boolean values may have another representation in vectors
9175 and therefore we prefer bit operations over comparison for
9176 them (which also works for scalar masks). We store opcodes
9177 to use in bitop1 and bitop2. Statement is vectorized as
9178 BITOP2 (rhs1 BITOP1 rhs2) or
9179 rhs1 BITOP2 (BITOP1 rhs2)
9180 depending on bitop1 and bitop2 arity. */
9181 if (VECTOR_BOOLEAN_TYPE_P (vectype))
9183 if (code == GT_EXPR)
9185 bitop1 = BIT_NOT_EXPR;
9186 bitop2 = BIT_AND_EXPR;
9188 else if (code == GE_EXPR)
9190 bitop1 = BIT_NOT_EXPR;
9191 bitop2 = BIT_IOR_EXPR;
9193 else if (code == LT_EXPR)
9195 bitop1 = BIT_NOT_EXPR;
9196 bitop2 = BIT_AND_EXPR;
9197 std::swap (rhs1, rhs2);
9198 std::swap (dts[0], dts[1]);
9200 else if (code == LE_EXPR)
9202 bitop1 = BIT_NOT_EXPR;
9203 bitop2 = BIT_IOR_EXPR;
9204 std::swap (rhs1, rhs2);
9205 std::swap (dts[0], dts[1]);
9207 else
9209 bitop1 = BIT_XOR_EXPR;
9210 if (code == EQ_EXPR)
9211 bitop2 = BIT_NOT_EXPR;
9215 if (!vec_stmt)
9217 if (bitop1 == NOP_EXPR)
9219 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
9220 return false;
9222 else
9224 machine_mode mode = TYPE_MODE (vectype);
9225 optab optab;
9227 optab = optab_for_tree_code (bitop1, vectype, optab_default);
9228 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9229 return false;
9231 if (bitop2 != NOP_EXPR)
9233 optab = optab_for_tree_code (bitop2, vectype, optab_default);
9234 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9235 return false;
9239 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9240 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9241 dts, ndts, slp_node, cost_vec);
9242 return true;
9245 /* Transform. */
9246 if (!slp_node)
9248 vec_oprnds0.create (1);
9249 vec_oprnds1.create (1);
9252 /* Handle def. */
9253 lhs = gimple_assign_lhs (stmt);
9254 mask = vect_create_destination_var (lhs, mask_type);
9256 /* Handle cmp expr. */
9257 for (j = 0; j < ncopies; j++)
9259 gassign *new_stmt = NULL;
9260 if (j == 0)
9262 if (slp_node)
9264 auto_vec<tree, 2> ops;
9265 auto_vec<vec<tree>, 2> vec_defs;
9267 ops.safe_push (rhs1);
9268 ops.safe_push (rhs2);
9269 vect_get_slp_defs (ops, slp_node, &vec_defs);
9270 vec_oprnds1 = vec_defs.pop ();
9271 vec_oprnds0 = vec_defs.pop ();
9273 else
9275 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
9276 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
9279 else
9281 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
9282 vec_oprnds0.pop ());
9283 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
9284 vec_oprnds1.pop ());
9287 if (!slp_node)
9289 vec_oprnds0.quick_push (vec_rhs1);
9290 vec_oprnds1.quick_push (vec_rhs2);
9293 /* Arguments are ready. Create the new vector stmt. */
9294 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9296 vec_rhs2 = vec_oprnds1[i];
9298 new_temp = make_ssa_name (mask);
9299 if (bitop1 == NOP_EXPR)
9301 new_stmt = gimple_build_assign (new_temp, code,
9302 vec_rhs1, vec_rhs2);
9303 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9305 else
9307 if (bitop1 == BIT_NOT_EXPR)
9308 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9309 else
9310 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9311 vec_rhs2);
9312 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9313 if (bitop2 != NOP_EXPR)
9315 tree res = make_ssa_name (mask);
9316 if (bitop2 == BIT_NOT_EXPR)
9317 new_stmt = gimple_build_assign (res, bitop2, new_temp);
9318 else
9319 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9320 new_temp);
9321 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9324 if (slp_node)
9325 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9328 if (slp_node)
9329 continue;
9331 if (j == 0)
9332 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
9333 else
9334 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
9336 prev_stmt_info = vinfo_for_stmt (new_stmt);
9339 vec_oprnds0.release ();
9340 vec_oprnds1.release ();
9342 return true;
9345 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9346 can handle all live statements in the node. Otherwise return true
9347 if STMT is not live or if vectorizable_live_operation can handle it.
9348 GSI and VEC_STMT are as for vectorizable_live_operation. */
9350 static bool
9351 can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
9352 slp_tree slp_node, gimple **vec_stmt,
9353 stmt_vector_for_cost *cost_vec)
9355 if (slp_node)
9357 gimple *slp_stmt;
9358 unsigned int i;
9359 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
9361 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
9362 if (STMT_VINFO_LIVE_P (slp_stmt_info)
9363 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
9364 vec_stmt, cost_vec))
9365 return false;
9368 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
9369 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt,
9370 cost_vec))
9371 return false;
9373 return true;
9376 /* Make sure the statement is vectorizable. */
9378 bool
9379 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
9380 slp_instance node_instance, stmt_vector_for_cost *cost_vec)
9382 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9383 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9384 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
9385 bool ok;
9386 gimple *pattern_stmt;
9387 gimple_seq pattern_def_seq;
9389 if (dump_enabled_p ())
9391 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
9392 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9395 if (gimple_has_volatile_ops (stmt))
9397 if (dump_enabled_p ())
9398 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9399 "not vectorized: stmt has volatile operands\n");
9401 return false;
9404 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9405 && node == NULL
9406 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
9408 gimple_stmt_iterator si;
9410 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9412 gimple *pattern_def_stmt = gsi_stmt (si);
9413 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
9414 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
9416 /* Analyze def stmt of STMT if it's a pattern stmt. */
9417 if (dump_enabled_p ())
9419 dump_printf_loc (MSG_NOTE, vect_location,
9420 "==> examining pattern def statement: ");
9421 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
9424 if (!vect_analyze_stmt (pattern_def_stmt,
9425 need_to_vectorize, node, node_instance,
9426 cost_vec))
9427 return false;
9432 /* Skip stmts that do not need to be vectorized. In loops this is expected
9433 to include:
9434 - the COND_EXPR which is the loop exit condition
9435 - any LABEL_EXPRs in the loop
9436 - computations that are used only for array indexing or loop control.
9437 In basic blocks we only analyze statements that are a part of some SLP
9438 instance, therefore, all the statements are relevant.
9440 Pattern statement needs to be analyzed instead of the original statement
9441 if the original statement is not relevant. Otherwise, we analyze both
9442 statements. In basic blocks we are called from some SLP instance
9443 traversal, don't analyze pattern stmts instead, the pattern stmts
9444 already will be part of SLP instance. */
9446 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
9447 if (!STMT_VINFO_RELEVANT_P (stmt_info)
9448 && !STMT_VINFO_LIVE_P (stmt_info))
9450 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9451 && pattern_stmt
9452 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9453 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9455 /* Analyze PATTERN_STMT instead of the original stmt. */
9456 stmt = pattern_stmt;
9457 stmt_info = vinfo_for_stmt (pattern_stmt);
9458 if (dump_enabled_p ())
9460 dump_printf_loc (MSG_NOTE, vect_location,
9461 "==> examining pattern statement: ");
9462 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9465 else
9467 if (dump_enabled_p ())
9468 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
9470 return true;
9473 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9474 && node == NULL
9475 && pattern_stmt
9476 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9477 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9479 /* Analyze PATTERN_STMT too. */
9480 if (dump_enabled_p ())
9482 dump_printf_loc (MSG_NOTE, vect_location,
9483 "==> examining pattern statement: ");
9484 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9487 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
9488 node_instance, cost_vec))
9489 return false;
9492 switch (STMT_VINFO_DEF_TYPE (stmt_info))
9494 case vect_internal_def:
9495 break;
9497 case vect_reduction_def:
9498 case vect_nested_cycle:
9499 gcc_assert (!bb_vinfo
9500 && (relevance == vect_used_in_outer
9501 || relevance == vect_used_in_outer_by_reduction
9502 || relevance == vect_used_by_reduction
9503 || relevance == vect_unused_in_scope
9504 || relevance == vect_used_only_live));
9505 break;
9507 case vect_induction_def:
9508 gcc_assert (!bb_vinfo);
9509 break;
9511 case vect_constant_def:
9512 case vect_external_def:
9513 case vect_unknown_def_type:
9514 default:
9515 gcc_unreachable ();
9518 if (STMT_VINFO_RELEVANT_P (stmt_info))
9520 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
9521 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9522 || (is_gimple_call (stmt)
9523 && gimple_call_lhs (stmt) == NULL_TREE));
9524 *need_to_vectorize = true;
9527 if (PURE_SLP_STMT (stmt_info) && !node)
9529 dump_printf_loc (MSG_NOTE, vect_location,
9530 "handled only by SLP analysis\n");
9531 return true;
9534 ok = true;
9535 if (!bb_vinfo
9536 && (STMT_VINFO_RELEVANT_P (stmt_info)
9537 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
9538 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node, cost_vec)
9539 || vectorizable_conversion (stmt, NULL, NULL, node, cost_vec)
9540 || vectorizable_shift (stmt, NULL, NULL, node, cost_vec)
9541 || vectorizable_operation (stmt, NULL, NULL, node, cost_vec)
9542 || vectorizable_assignment (stmt, NULL, NULL, node, cost_vec)
9543 || vectorizable_load (stmt, NULL, NULL, node, node_instance, cost_vec)
9544 || vectorizable_call (stmt, NULL, NULL, node, cost_vec)
9545 || vectorizable_store (stmt, NULL, NULL, node, cost_vec)
9546 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance,
9547 cost_vec)
9548 || vectorizable_induction (stmt, NULL, NULL, node, cost_vec)
9549 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node, cost_vec)
9550 || vectorizable_comparison (stmt, NULL, NULL, NULL, node, cost_vec));
9551 else
9553 if (bb_vinfo)
9554 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node, cost_vec)
9555 || vectorizable_conversion (stmt, NULL, NULL, node, cost_vec)
9556 || vectorizable_shift (stmt, NULL, NULL, node, cost_vec)
9557 || vectorizable_operation (stmt, NULL, NULL, node, cost_vec)
9558 || vectorizable_assignment (stmt, NULL, NULL, node, cost_vec)
9559 || vectorizable_load (stmt, NULL, NULL, node, node_instance,
9560 cost_vec)
9561 || vectorizable_call (stmt, NULL, NULL, node, cost_vec)
9562 || vectorizable_store (stmt, NULL, NULL, node, cost_vec)
9563 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node,
9564 cost_vec)
9565 || vectorizable_comparison (stmt, NULL, NULL, NULL, node,
9566 cost_vec));
9569 if (!ok)
9571 if (dump_enabled_p ())
9573 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9574 "not vectorized: relevant stmt not ");
9575 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
9576 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
9579 return false;
9582 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9583 need extra handling, except for vectorizable reductions. */
9584 if (!bb_vinfo
9585 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9586 && !can_vectorize_live_stmts (stmt, NULL, node, NULL, cost_vec))
9588 if (dump_enabled_p ())
9590 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9591 "not vectorized: live stmt not supported: ");
9592 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
9595 return false;
9598 return true;
9602 /* Function vect_transform_stmt.
9604 Create a vectorized stmt to replace STMT, and insert it at BSI. */
9606 bool
9607 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
9608 bool *grouped_store, slp_tree slp_node,
9609 slp_instance slp_node_instance)
9611 bool is_store = false;
9612 gimple *vec_stmt = NULL;
9613 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9614 bool done;
9616 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
9617 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9619 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9620 && nested_in_vect_loop_p
9621 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9622 stmt));
9624 switch (STMT_VINFO_TYPE (stmt_info))
9626 case type_demotion_vec_info_type:
9627 case type_promotion_vec_info_type:
9628 case type_conversion_vec_info_type:
9629 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node, NULL);
9630 gcc_assert (done);
9631 break;
9633 case induc_vec_info_type:
9634 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node, NULL);
9635 gcc_assert (done);
9636 break;
9638 case shift_vec_info_type:
9639 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node, NULL);
9640 gcc_assert (done);
9641 break;
9643 case op_vec_info_type:
9644 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node, NULL);
9645 gcc_assert (done);
9646 break;
9648 case assignment_vec_info_type:
9649 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node, NULL);
9650 gcc_assert (done);
9651 break;
9653 case load_vec_info_type:
9654 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
9655 slp_node_instance, NULL);
9656 gcc_assert (done);
9657 break;
9659 case store_vec_info_type:
9660 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node, NULL);
9661 gcc_assert (done);
9662 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
9664 /* In case of interleaving, the whole chain is vectorized when the
9665 last store in the chain is reached. Store stmts before the last
9666 one are skipped, and there vec_stmt_info shouldn't be freed
9667 meanwhile. */
9668 *grouped_store = true;
9669 stmt_vec_info group_info
9670 = vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (stmt_info));
9671 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
9672 is_store = true;
9674 else
9675 is_store = true;
9676 break;
9678 case condition_vec_info_type:
9679 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node, NULL);
9680 gcc_assert (done);
9681 break;
9683 case comparison_vec_info_type:
9684 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node, NULL);
9685 gcc_assert (done);
9686 break;
9688 case call_vec_info_type:
9689 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node, NULL);
9690 stmt = gsi_stmt (*gsi);
9691 break;
9693 case call_simd_clone_vec_info_type:
9694 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node, NULL);
9695 stmt = gsi_stmt (*gsi);
9696 break;
9698 case reduc_vec_info_type:
9699 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
9700 slp_node_instance, NULL);
9701 gcc_assert (done);
9702 break;
9704 default:
9705 if (!STMT_VINFO_LIVE_P (stmt_info))
9707 if (dump_enabled_p ())
9708 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9709 "stmt not supported.\n");
9710 gcc_unreachable ();
9714 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9715 This would break hybrid SLP vectorization. */
9716 if (slp_node)
9717 gcc_assert (!vec_stmt
9718 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
9720 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9721 is being vectorized, but outside the immediately enclosing loop. */
9722 if (vec_stmt
9723 && nested_p
9724 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9725 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
9726 || STMT_VINFO_RELEVANT (stmt_info) ==
9727 vect_used_in_outer_by_reduction))
9729 struct loop *innerloop = LOOP_VINFO_LOOP (
9730 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
9731 imm_use_iterator imm_iter;
9732 use_operand_p use_p;
9733 tree scalar_dest;
9734 gimple *exit_phi;
9736 if (dump_enabled_p ())
9737 dump_printf_loc (MSG_NOTE, vect_location,
9738 "Record the vdef for outer-loop vectorization.\n");
9740 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9741 (to be used when vectorizing outer-loop stmts that use the DEF of
9742 STMT). */
9743 if (gimple_code (stmt) == GIMPLE_PHI)
9744 scalar_dest = PHI_RESULT (stmt);
9745 else
9746 scalar_dest = gimple_assign_lhs (stmt);
9748 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9750 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9752 exit_phi = USE_STMT (use_p);
9753 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
9758 /* Handle stmts whose DEF is used outside the loop-nest that is
9759 being vectorized. */
9760 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
9762 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt, NULL);
9763 gcc_assert (done);
9766 if (vec_stmt)
9767 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
9769 return is_store;
9773 /* Remove a group of stores (for SLP or interleaving), free their
9774 stmt_vec_info. */
9776 void
9777 vect_remove_stores (gimple *first_stmt)
9779 gimple *next = first_stmt;
9780 gimple *tmp;
9781 gimple_stmt_iterator next_si;
9783 while (next)
9785 stmt_vec_info stmt_info = vinfo_for_stmt (next);
9787 tmp = DR_GROUP_NEXT_ELEMENT (stmt_info);
9788 if (is_pattern_stmt_p (stmt_info))
9789 next = STMT_VINFO_RELATED_STMT (stmt_info);
9790 /* Free the attached stmt_vec_info and remove the stmt. */
9791 next_si = gsi_for_stmt (next);
9792 unlink_stmt_vdef (next);
9793 gsi_remove (&next_si, true);
9794 release_defs (next);
9795 free_stmt_vec_info (next);
9796 next = tmp;
9801 /* Function new_stmt_vec_info.
9803 Create and initialize a new stmt_vec_info struct for STMT. */
9805 stmt_vec_info
9806 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
9808 stmt_vec_info res;
9809 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
9811 STMT_VINFO_TYPE (res) = undef_vec_info_type;
9812 STMT_VINFO_STMT (res) = stmt;
9813 res->vinfo = vinfo;
9814 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
9815 STMT_VINFO_LIVE_P (res) = false;
9816 STMT_VINFO_VECTYPE (res) = NULL;
9817 STMT_VINFO_VEC_STMT (res) = NULL;
9818 STMT_VINFO_VECTORIZABLE (res) = true;
9819 STMT_VINFO_IN_PATTERN_P (res) = false;
9820 STMT_VINFO_RELATED_STMT (res) = NULL;
9821 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
9822 STMT_VINFO_DATA_REF (res) = NULL;
9823 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
9824 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
9826 if (gimple_code (stmt) == GIMPLE_PHI
9827 && is_loop_header_bb_p (gimple_bb (stmt)))
9828 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9829 else
9830 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9832 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
9833 STMT_SLP_TYPE (res) = loop_vect;
9834 STMT_VINFO_NUM_SLP_USES (res) = 0;
9836 res->first_element = NULL; /* GROUP_FIRST_ELEMENT */
9837 res->next_element = NULL; /* GROUP_NEXT_ELEMENT */
9838 res->size = 0; /* GROUP_SIZE */
9839 res->store_count = 0; /* GROUP_STORE_COUNT */
9840 res->gap = 0; /* GROUP_GAP */
9841 res->same_dr_stmt = NULL; /* GROUP_SAME_DR_STMT */
9843 /* This is really "uninitialized" until vect_compute_data_ref_alignment. */
9844 res->dr_aux.misalignment = DR_MISALIGNMENT_UNINITIALIZED;
9846 return res;
9850 /* Set the current stmt_vec_info vector to V. */
9852 void
9853 set_stmt_vec_info_vec (vec<stmt_vec_info> *v)
9855 stmt_vec_info_vec = v;
9858 /* Free the stmt_vec_info entries in V and release V. */
9860 void
9861 free_stmt_vec_infos (vec<stmt_vec_info> *v)
9863 unsigned int i;
9864 stmt_vec_info info;
9865 FOR_EACH_VEC_ELT (*v, i, info)
9866 if (info != NULL)
9867 free_stmt_vec_info (STMT_VINFO_STMT (info));
9868 if (v == stmt_vec_info_vec)
9869 stmt_vec_info_vec = NULL;
9870 v->release ();
9874 /* Free stmt vectorization related info. */
9876 void
9877 free_stmt_vec_info (gimple *stmt)
9879 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9881 if (!stmt_info)
9882 return;
9884 /* Check if this statement has a related "pattern stmt"
9885 (introduced by the vectorizer during the pattern recognition
9886 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9887 too. */
9888 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9890 if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))
9891 for (gimple_stmt_iterator si = gsi_start (seq);
9892 !gsi_end_p (si); gsi_next (&si))
9894 gimple *seq_stmt = gsi_stmt (si);
9895 gimple_set_bb (seq_stmt, NULL);
9896 tree lhs = gimple_get_lhs (seq_stmt);
9897 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9898 release_ssa_name (lhs);
9899 free_stmt_vec_info (seq_stmt);
9901 stmt_vec_info patt_info
9902 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9903 if (patt_info)
9905 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
9906 gimple_set_bb (patt_stmt, NULL);
9907 tree lhs = gimple_get_lhs (patt_stmt);
9908 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9909 release_ssa_name (lhs);
9910 free_stmt_vec_info (patt_stmt);
9914 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
9915 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
9916 set_vinfo_for_stmt (stmt, NULL);
9917 free (stmt_info);
9921 /* Function get_vectype_for_scalar_type_and_size.
9923 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9924 by the target. */
9926 tree
9927 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
9929 tree orig_scalar_type = scalar_type;
9930 scalar_mode inner_mode;
9931 machine_mode simd_mode;
9932 poly_uint64 nunits;
9933 tree vectype;
9935 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9936 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9937 return NULL_TREE;
9939 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9941 /* For vector types of elements whose mode precision doesn't
9942 match their types precision we use a element type of mode
9943 precision. The vectorization routines will have to make sure
9944 they support the proper result truncation/extension.
9945 We also make sure to build vector types with INTEGER_TYPE
9946 component type only. */
9947 if (INTEGRAL_TYPE_P (scalar_type)
9948 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9949 || TREE_CODE (scalar_type) != INTEGER_TYPE))
9950 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9951 TYPE_UNSIGNED (scalar_type));
9953 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9954 When the component mode passes the above test simply use a type
9955 corresponding to that mode. The theory is that any use that
9956 would cause problems with this will disable vectorization anyway. */
9957 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9958 && !INTEGRAL_TYPE_P (scalar_type))
9959 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9961 /* We can't build a vector type of elements with alignment bigger than
9962 their size. */
9963 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9964 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9965 TYPE_UNSIGNED (scalar_type));
9967 /* If we felt back to using the mode fail if there was
9968 no scalar type for it. */
9969 if (scalar_type == NULL_TREE)
9970 return NULL_TREE;
9972 /* If no size was supplied use the mode the target prefers. Otherwise
9973 lookup a vector mode of the specified size. */
9974 if (known_eq (size, 0U))
9975 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9976 else if (!multiple_p (size, nbytes, &nunits)
9977 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9978 return NULL_TREE;
9979 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9980 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
9981 return NULL_TREE;
9983 vectype = build_vector_type (scalar_type, nunits);
9985 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9986 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9987 return NULL_TREE;
9989 /* Re-attach the address-space qualifier if we canonicalized the scalar
9990 type. */
9991 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9992 return build_qualified_type
9993 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9995 return vectype;
9998 poly_uint64 current_vector_size;
10000 /* Function get_vectype_for_scalar_type.
10002 Returns the vector type corresponding to SCALAR_TYPE as supported
10003 by the target. */
10005 tree
10006 get_vectype_for_scalar_type (tree scalar_type)
10008 tree vectype;
10009 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
10010 current_vector_size);
10011 if (vectype
10012 && known_eq (current_vector_size, 0U))
10013 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
10014 return vectype;
10017 /* Function get_mask_type_for_scalar_type.
10019 Returns the mask type corresponding to a result of comparison
10020 of vectors of specified SCALAR_TYPE as supported by target. */
10022 tree
10023 get_mask_type_for_scalar_type (tree scalar_type)
10025 tree vectype = get_vectype_for_scalar_type (scalar_type);
10027 if (!vectype)
10028 return NULL;
10030 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
10031 current_vector_size);
10034 /* Function get_same_sized_vectype
10036 Returns a vector type corresponding to SCALAR_TYPE of size
10037 VECTOR_TYPE if supported by the target. */
10039 tree
10040 get_same_sized_vectype (tree scalar_type, tree vector_type)
10042 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10043 return build_same_sized_truth_vector_type (vector_type);
10045 return get_vectype_for_scalar_type_and_size
10046 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
10049 /* Function vect_is_simple_use.
10051 Input:
10052 VINFO - the vect info of the loop or basic block that is being vectorized.
10053 OPERAND - operand in the loop or bb.
10054 Output:
10055 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME.
10056 DT - the type of definition
10058 Returns whether a stmt with OPERAND can be vectorized.
10059 For loops, supportable operands are constants, loop invariants, and operands
10060 that are defined by the current iteration of the loop. Unsupportable
10061 operands are those that are defined by a previous iteration of the loop (as
10062 is the case in reduction/induction computations).
10063 For basic blocks, supportable operands are constants and bb invariants.
10064 For now, operands defined outside the basic block are not supported. */
10066 bool
10067 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10068 gimple **def_stmt_out)
10070 if (def_stmt_out)
10071 *def_stmt_out = NULL;
10072 *dt = vect_unknown_def_type;
10074 if (dump_enabled_p ())
10076 dump_printf_loc (MSG_NOTE, vect_location,
10077 "vect_is_simple_use: operand ");
10078 if (TREE_CODE (operand) == SSA_NAME
10079 && !SSA_NAME_IS_DEFAULT_DEF (operand))
10080 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
10081 else
10082 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
10085 if (CONSTANT_CLASS_P (operand))
10086 *dt = vect_constant_def;
10087 else if (is_gimple_min_invariant (operand))
10088 *dt = vect_external_def;
10089 else if (TREE_CODE (operand) != SSA_NAME)
10090 *dt = vect_unknown_def_type;
10091 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
10092 *dt = vect_external_def;
10093 else
10095 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
10096 if (! vect_stmt_in_region_p (vinfo, def_stmt))
10097 *dt = vect_external_def;
10098 else
10100 stmt_vec_info stmt_vinfo = vinfo_for_stmt (def_stmt);
10101 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
10103 def_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
10104 stmt_vinfo = vinfo_for_stmt (def_stmt);
10106 switch (gimple_code (def_stmt))
10108 case GIMPLE_PHI:
10109 case GIMPLE_ASSIGN:
10110 case GIMPLE_CALL:
10111 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
10112 break;
10113 default:
10114 *dt = vect_unknown_def_type;
10115 break;
10118 if (def_stmt_out)
10119 *def_stmt_out = def_stmt;
10122 if (dump_enabled_p ())
10124 dump_printf (MSG_NOTE, ", type of def: ");
10125 switch (*dt)
10127 case vect_uninitialized_def:
10128 dump_printf (MSG_NOTE, "uninitialized\n");
10129 break;
10130 case vect_constant_def:
10131 dump_printf (MSG_NOTE, "constant\n");
10132 break;
10133 case vect_external_def:
10134 dump_printf (MSG_NOTE, "external\n");
10135 break;
10136 case vect_internal_def:
10137 dump_printf (MSG_NOTE, "internal\n");
10138 break;
10139 case vect_induction_def:
10140 dump_printf (MSG_NOTE, "induction\n");
10141 break;
10142 case vect_reduction_def:
10143 dump_printf (MSG_NOTE, "reduction\n");
10144 break;
10145 case vect_double_reduction_def:
10146 dump_printf (MSG_NOTE, "double reduction\n");
10147 break;
10148 case vect_nested_cycle:
10149 dump_printf (MSG_NOTE, "nested cycle\n");
10150 break;
10151 case vect_unknown_def_type:
10152 dump_printf (MSG_NOTE, "unknown\n");
10153 break;
10157 if (*dt == vect_unknown_def_type)
10159 if (dump_enabled_p ())
10160 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10161 "Unsupported pattern.\n");
10162 return false;
10165 return true;
10168 /* Function vect_is_simple_use.
10170 Same as vect_is_simple_use but also determines the vector operand
10171 type of OPERAND and stores it to *VECTYPE. If the definition of
10172 OPERAND is vect_uninitialized_def, vect_constant_def or
10173 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10174 is responsible to compute the best suited vector type for the
10175 scalar operand. */
10177 bool
10178 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10179 tree *vectype, gimple **def_stmt_out)
10181 gimple *def_stmt;
10182 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt))
10183 return false;
10185 if (def_stmt_out)
10186 *def_stmt_out = def_stmt;
10188 /* Now get a vector type if the def is internal, otherwise supply
10189 NULL_TREE and leave it up to the caller to figure out a proper
10190 type for the use stmt. */
10191 if (*dt == vect_internal_def
10192 || *dt == vect_induction_def
10193 || *dt == vect_reduction_def
10194 || *dt == vect_double_reduction_def
10195 || *dt == vect_nested_cycle)
10197 stmt_vec_info stmt_info = vinfo_for_stmt (def_stmt);
10198 *vectype = STMT_VINFO_VECTYPE (stmt_info);
10199 gcc_assert (*vectype != NULL_TREE);
10200 if (dump_enabled_p ())
10202 dump_printf_loc (MSG_NOTE, vect_location,
10203 "vect_is_simple_use: vectype ");
10204 dump_generic_expr (MSG_NOTE, TDF_SLIM, *vectype);
10205 dump_printf (MSG_NOTE, "\n");
10208 else if (*dt == vect_uninitialized_def
10209 || *dt == vect_constant_def
10210 || *dt == vect_external_def)
10211 *vectype = NULL_TREE;
10212 else
10213 gcc_unreachable ();
10215 return true;
10219 /* Function supportable_widening_operation
10221 Check whether an operation represented by the code CODE is a
10222 widening operation that is supported by the target platform in
10223 vector form (i.e., when operating on arguments of type VECTYPE_IN
10224 producing a result of type VECTYPE_OUT).
10226 Widening operations we currently support are NOP (CONVERT), FLOAT,
10227 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
10228 are supported by the target platform either directly (via vector
10229 tree-codes), or via target builtins.
10231 Output:
10232 - CODE1 and CODE2 are codes of vector operations to be used when
10233 vectorizing the operation, if available.
10234 - MULTI_STEP_CVT determines the number of required intermediate steps in
10235 case of multi-step conversion (like char->short->int - in that case
10236 MULTI_STEP_CVT will be 1).
10237 - INTERM_TYPES contains the intermediate type required to perform the
10238 widening operation (short in the above example). */
10240 bool
10241 supportable_widening_operation (enum tree_code code, gimple *stmt,
10242 tree vectype_out, tree vectype_in,
10243 enum tree_code *code1, enum tree_code *code2,
10244 int *multi_step_cvt,
10245 vec<tree> *interm_types)
10247 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
10248 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
10249 struct loop *vect_loop = NULL;
10250 machine_mode vec_mode;
10251 enum insn_code icode1, icode2;
10252 optab optab1, optab2;
10253 tree vectype = vectype_in;
10254 tree wide_vectype = vectype_out;
10255 enum tree_code c1, c2;
10256 int i;
10257 tree prev_type, intermediate_type;
10258 machine_mode intermediate_mode, prev_mode;
10259 optab optab3, optab4;
10261 *multi_step_cvt = 0;
10262 if (loop_info)
10263 vect_loop = LOOP_VINFO_LOOP (loop_info);
10265 switch (code)
10267 case WIDEN_MULT_EXPR:
10268 /* The result of a vectorized widening operation usually requires
10269 two vectors (because the widened results do not fit into one vector).
10270 The generated vector results would normally be expected to be
10271 generated in the same order as in the original scalar computation,
10272 i.e. if 8 results are generated in each vector iteration, they are
10273 to be organized as follows:
10274 vect1: [res1,res2,res3,res4],
10275 vect2: [res5,res6,res7,res8].
10277 However, in the special case that the result of the widening
10278 operation is used in a reduction computation only, the order doesn't
10279 matter (because when vectorizing a reduction we change the order of
10280 the computation). Some targets can take advantage of this and
10281 generate more efficient code. For example, targets like Altivec,
10282 that support widen_mult using a sequence of {mult_even,mult_odd}
10283 generate the following vectors:
10284 vect1: [res1,res3,res5,res7],
10285 vect2: [res2,res4,res6,res8].
10287 When vectorizing outer-loops, we execute the inner-loop sequentially
10288 (each vectorized inner-loop iteration contributes to VF outer-loop
10289 iterations in parallel). We therefore don't allow to change the
10290 order of the computation in the inner-loop during outer-loop
10291 vectorization. */
10292 /* TODO: Another case in which order doesn't *really* matter is when we
10293 widen and then contract again, e.g. (short)((int)x * y >> 8).
10294 Normally, pack_trunc performs an even/odd permute, whereas the
10295 repack from an even/odd expansion would be an interleave, which
10296 would be significantly simpler for e.g. AVX2. */
10297 /* In any case, in order to avoid duplicating the code below, recurse
10298 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10299 are properly set up for the caller. If we fail, we'll continue with
10300 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10301 if (vect_loop
10302 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10303 && !nested_in_vect_loop_p (vect_loop, stmt)
10304 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10305 stmt, vectype_out, vectype_in,
10306 code1, code2, multi_step_cvt,
10307 interm_types))
10309 /* Elements in a vector with vect_used_by_reduction property cannot
10310 be reordered if the use chain with this property does not have the
10311 same operation. One such an example is s += a * b, where elements
10312 in a and b cannot be reordered. Here we check if the vector defined
10313 by STMT is only directly used in the reduction statement. */
10314 tree lhs = gimple_assign_lhs (stmt);
10315 use_operand_p dummy;
10316 gimple *use_stmt;
10317 stmt_vec_info use_stmt_info = NULL;
10318 if (single_imm_use (lhs, &dummy, &use_stmt)
10319 && (use_stmt_info = vinfo_for_stmt (use_stmt))
10320 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10321 return true;
10323 c1 = VEC_WIDEN_MULT_LO_EXPR;
10324 c2 = VEC_WIDEN_MULT_HI_EXPR;
10325 break;
10327 case DOT_PROD_EXPR:
10328 c1 = DOT_PROD_EXPR;
10329 c2 = DOT_PROD_EXPR;
10330 break;
10332 case SAD_EXPR:
10333 c1 = SAD_EXPR;
10334 c2 = SAD_EXPR;
10335 break;
10337 case VEC_WIDEN_MULT_EVEN_EXPR:
10338 /* Support the recursion induced just above. */
10339 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10340 c2 = VEC_WIDEN_MULT_ODD_EXPR;
10341 break;
10343 case WIDEN_LSHIFT_EXPR:
10344 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10345 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
10346 break;
10348 CASE_CONVERT:
10349 c1 = VEC_UNPACK_LO_EXPR;
10350 c2 = VEC_UNPACK_HI_EXPR;
10351 break;
10353 case FLOAT_EXPR:
10354 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10355 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
10356 break;
10358 case FIX_TRUNC_EXPR:
10359 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
10360 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
10361 break;
10363 default:
10364 gcc_unreachable ();
10367 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
10368 std::swap (c1, c2);
10370 if (code == FIX_TRUNC_EXPR)
10372 /* The signedness is determined from output operand. */
10373 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10374 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
10376 else
10378 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10379 optab2 = optab_for_tree_code (c2, vectype, optab_default);
10382 if (!optab1 || !optab2)
10383 return false;
10385 vec_mode = TYPE_MODE (vectype);
10386 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10387 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
10388 return false;
10390 *code1 = c1;
10391 *code2 = c2;
10393 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10394 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10395 /* For scalar masks we may have different boolean
10396 vector types having the same QImode. Thus we
10397 add additional check for elements number. */
10398 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10399 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10400 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10402 /* Check if it's a multi-step conversion that can be done using intermediate
10403 types. */
10405 prev_type = vectype;
10406 prev_mode = vec_mode;
10408 if (!CONVERT_EXPR_CODE_P (code))
10409 return false;
10411 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10412 intermediate steps in promotion sequence. We try
10413 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10414 not. */
10415 interm_types->create (MAX_INTERM_CVT_STEPS);
10416 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10418 intermediate_mode = insn_data[icode1].operand[0].mode;
10419 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10421 intermediate_type = vect_halve_mask_nunits (prev_type);
10422 if (intermediate_mode != TYPE_MODE (intermediate_type))
10423 return false;
10425 else
10426 intermediate_type
10427 = lang_hooks.types.type_for_mode (intermediate_mode,
10428 TYPE_UNSIGNED (prev_type));
10430 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10431 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10433 if (!optab3 || !optab4
10434 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10435 || insn_data[icode1].operand[0].mode != intermediate_mode
10436 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10437 || insn_data[icode2].operand[0].mode != intermediate_mode
10438 || ((icode1 = optab_handler (optab3, intermediate_mode))
10439 == CODE_FOR_nothing)
10440 || ((icode2 = optab_handler (optab4, intermediate_mode))
10441 == CODE_FOR_nothing))
10442 break;
10444 interm_types->quick_push (intermediate_type);
10445 (*multi_step_cvt)++;
10447 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10448 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10449 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10450 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10451 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10453 prev_type = intermediate_type;
10454 prev_mode = intermediate_mode;
10457 interm_types->release ();
10458 return false;
10462 /* Function supportable_narrowing_operation
10464 Check whether an operation represented by the code CODE is a
10465 narrowing operation that is supported by the target platform in
10466 vector form (i.e., when operating on arguments of type VECTYPE_IN
10467 and producing a result of type VECTYPE_OUT).
10469 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
10470 and FLOAT. This function checks if these operations are supported by
10471 the target platform directly via vector tree-codes.
10473 Output:
10474 - CODE1 is the code of a vector operation to be used when
10475 vectorizing the operation, if available.
10476 - MULTI_STEP_CVT determines the number of required intermediate steps in
10477 case of multi-step conversion (like int->short->char - in that case
10478 MULTI_STEP_CVT will be 1).
10479 - INTERM_TYPES contains the intermediate type required to perform the
10480 narrowing operation (short in the above example). */
10482 bool
10483 supportable_narrowing_operation (enum tree_code code,
10484 tree vectype_out, tree vectype_in,
10485 enum tree_code *code1, int *multi_step_cvt,
10486 vec<tree> *interm_types)
10488 machine_mode vec_mode;
10489 enum insn_code icode1;
10490 optab optab1, interm_optab;
10491 tree vectype = vectype_in;
10492 tree narrow_vectype = vectype_out;
10493 enum tree_code c1;
10494 tree intermediate_type, prev_type;
10495 machine_mode intermediate_mode, prev_mode;
10496 int i;
10497 bool uns;
10499 *multi_step_cvt = 0;
10500 switch (code)
10502 CASE_CONVERT:
10503 c1 = VEC_PACK_TRUNC_EXPR;
10504 break;
10506 case FIX_TRUNC_EXPR:
10507 c1 = VEC_PACK_FIX_TRUNC_EXPR;
10508 break;
10510 case FLOAT_EXPR:
10511 c1 = VEC_PACK_FLOAT_EXPR;
10512 break;
10514 default:
10515 gcc_unreachable ();
10518 if (code == FIX_TRUNC_EXPR)
10519 /* The signedness is determined from output operand. */
10520 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10521 else
10522 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10524 if (!optab1)
10525 return false;
10527 vec_mode = TYPE_MODE (vectype);
10528 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
10529 return false;
10531 *code1 = c1;
10533 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10534 /* For scalar masks we may have different boolean
10535 vector types having the same QImode. Thus we
10536 add additional check for elements number. */
10537 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10538 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10539 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10541 if (code == FLOAT_EXPR)
10542 return false;
10544 /* Check if it's a multi-step conversion that can be done using intermediate
10545 types. */
10546 prev_mode = vec_mode;
10547 prev_type = vectype;
10548 if (code == FIX_TRUNC_EXPR)
10549 uns = TYPE_UNSIGNED (vectype_out);
10550 else
10551 uns = TYPE_UNSIGNED (vectype);
10553 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10554 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10555 costly than signed. */
10556 if (code == FIX_TRUNC_EXPR && uns)
10558 enum insn_code icode2;
10560 intermediate_type
10561 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10562 interm_optab
10563 = optab_for_tree_code (c1, intermediate_type, optab_default);
10564 if (interm_optab != unknown_optab
10565 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10566 && insn_data[icode1].operand[0].mode
10567 == insn_data[icode2].operand[0].mode)
10569 uns = false;
10570 optab1 = interm_optab;
10571 icode1 = icode2;
10575 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10576 intermediate steps in promotion sequence. We try
10577 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
10578 interm_types->create (MAX_INTERM_CVT_STEPS);
10579 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10581 intermediate_mode = insn_data[icode1].operand[0].mode;
10582 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10584 intermediate_type = vect_double_mask_nunits (prev_type);
10585 if (intermediate_mode != TYPE_MODE (intermediate_type))
10586 return false;
10588 else
10589 intermediate_type
10590 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
10591 interm_optab
10592 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10593 optab_default);
10594 if (!interm_optab
10595 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10596 || insn_data[icode1].operand[0].mode != intermediate_mode
10597 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10598 == CODE_FOR_nothing))
10599 break;
10601 interm_types->quick_push (intermediate_type);
10602 (*multi_step_cvt)++;
10604 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10605 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10606 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10607 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10609 prev_mode = intermediate_mode;
10610 prev_type = intermediate_type;
10611 optab1 = interm_optab;
10614 interm_types->release ();
10615 return false;
10618 /* Generate and return a statement that sets vector mask MASK such that
10619 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10621 gcall *
10622 vect_gen_while (tree mask, tree start_index, tree end_index)
10624 tree cmp_type = TREE_TYPE (start_index);
10625 tree mask_type = TREE_TYPE (mask);
10626 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10627 cmp_type, mask_type,
10628 OPTIMIZE_FOR_SPEED));
10629 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10630 start_index, end_index,
10631 build_zero_cst (mask_type));
10632 gimple_call_set_lhs (call, mask);
10633 return call;
10636 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10637 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10639 tree
10640 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10641 tree end_index)
10643 tree tmp = make_ssa_name (mask_type);
10644 gcall *call = vect_gen_while (tmp, start_index, end_index);
10645 gimple_seq_add_stmt (seq, call);
10646 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10649 /* Try to compute the vector types required to vectorize STMT_INFO,
10650 returning true on success and false if vectorization isn't possible.
10652 On success:
10654 - Set *STMT_VECTYPE_OUT to:
10655 - NULL_TREE if the statement doesn't need to be vectorized;
10656 - boolean_type_node if the statement is a boolean operation whose
10657 vector type can only be determined once all the other vector types
10658 are known; and
10659 - the equivalent of STMT_VINFO_VECTYPE otherwise.
10661 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10662 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10663 statement does not help to determine the overall number of units. */
10665 bool
10666 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
10667 tree *stmt_vectype_out,
10668 tree *nunits_vectype_out)
10670 gimple *stmt = stmt_info->stmt;
10672 *stmt_vectype_out = NULL_TREE;
10673 *nunits_vectype_out = NULL_TREE;
10675 if (gimple_get_lhs (stmt) == NULL_TREE
10676 /* MASK_STORE has no lhs, but is ok. */
10677 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
10679 if (is_a <gcall *> (stmt))
10681 /* Ignore calls with no lhs. These must be calls to
10682 #pragma omp simd functions, and what vectorization factor
10683 it really needs can't be determined until
10684 vectorizable_simd_clone_call. */
10685 if (dump_enabled_p ())
10686 dump_printf_loc (MSG_NOTE, vect_location,
10687 "defer to SIMD clone analysis.\n");
10688 return true;
10691 if (dump_enabled_p ())
10693 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10694 "not vectorized: irregular stmt.");
10695 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10697 return false;
10700 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
10702 if (dump_enabled_p ())
10704 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10705 "not vectorized: vector stmt in loop:");
10706 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10708 return false;
10711 tree vectype;
10712 tree scalar_type = NULL_TREE;
10713 if (STMT_VINFO_VECTYPE (stmt_info))
10714 *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
10715 else
10717 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
10718 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
10719 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
10720 else
10721 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
10723 /* Pure bool ops don't participate in number-of-units computation.
10724 For comparisons use the types being compared. */
10725 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
10726 && is_gimple_assign (stmt)
10727 && gimple_assign_rhs_code (stmt) != COND_EXPR)
10729 *stmt_vectype_out = boolean_type_node;
10731 tree rhs1 = gimple_assign_rhs1 (stmt);
10732 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10733 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10734 scalar_type = TREE_TYPE (rhs1);
10735 else
10737 if (dump_enabled_p ())
10738 dump_printf_loc (MSG_NOTE, vect_location,
10739 "pure bool operation.\n");
10740 return true;
10744 if (dump_enabled_p ())
10746 dump_printf_loc (MSG_NOTE, vect_location,
10747 "get vectype for scalar type: ");
10748 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
10749 dump_printf (MSG_NOTE, "\n");
10751 vectype = get_vectype_for_scalar_type (scalar_type);
10752 if (!vectype)
10754 if (dump_enabled_p ())
10756 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10757 "not vectorized: unsupported data-type ");
10758 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10759 scalar_type);
10760 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10762 return false;
10765 if (!*stmt_vectype_out)
10766 *stmt_vectype_out = vectype;
10768 if (dump_enabled_p ())
10770 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
10771 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
10772 dump_printf (MSG_NOTE, "\n");
10776 /* Don't try to compute scalar types if the stmt produces a boolean
10777 vector; use the existing vector type instead. */
10778 tree nunits_vectype;
10779 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10780 nunits_vectype = vectype;
10781 else
10783 /* The number of units is set according to the smallest scalar
10784 type (or the largest vector size, but we only support one
10785 vector size per vectorization). */
10786 if (*stmt_vectype_out != boolean_type_node)
10788 HOST_WIDE_INT dummy;
10789 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
10791 if (dump_enabled_p ())
10793 dump_printf_loc (MSG_NOTE, vect_location,
10794 "get vectype for scalar type: ");
10795 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
10796 dump_printf (MSG_NOTE, "\n");
10798 nunits_vectype = get_vectype_for_scalar_type (scalar_type);
10800 if (!nunits_vectype)
10802 if (dump_enabled_p ())
10804 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10805 "not vectorized: unsupported data-type ");
10806 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
10807 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10809 return false;
10812 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
10813 GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
10815 if (dump_enabled_p ())
10817 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10818 "not vectorized: different sized vector "
10819 "types in statement, ");
10820 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
10821 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10822 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, nunits_vectype);
10823 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10825 return false;
10828 if (dump_enabled_p ())
10830 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
10831 dump_generic_expr (MSG_NOTE, TDF_SLIM, nunits_vectype);
10832 dump_printf (MSG_NOTE, "\n");
10834 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
10835 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
10836 dump_printf (MSG_NOTE, "\n");
10839 *nunits_vectype_out = nunits_vectype;
10840 return true;
10843 /* Try to determine the correct vector type for STMT_INFO, which is a
10844 statement that produces a scalar boolean result. Return the vector
10845 type on success, otherwise return NULL_TREE. */
10847 tree
10848 vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
10850 gimple *stmt = stmt_info->stmt;
10851 tree mask_type = NULL;
10852 tree vectype, scalar_type;
10854 if (is_gimple_assign (stmt)
10855 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10856 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
10858 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
10859 mask_type = get_mask_type_for_scalar_type (scalar_type);
10861 if (!mask_type)
10863 if (dump_enabled_p ())
10864 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10865 "not vectorized: unsupported mask\n");
10866 return NULL_TREE;
10869 else
10871 tree rhs;
10872 ssa_op_iter iter;
10873 enum vect_def_type dt;
10875 FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
10877 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &dt, &vectype))
10879 if (dump_enabled_p ())
10881 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10882 "not vectorized: can't compute mask type "
10883 "for statement, ");
10884 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
10887 return NULL_TREE;
10890 /* No vectype probably means external definition.
10891 Allow it in case there is another operand which
10892 allows to determine mask type. */
10893 if (!vectype)
10894 continue;
10896 if (!mask_type)
10897 mask_type = vectype;
10898 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
10899 TYPE_VECTOR_SUBPARTS (vectype)))
10901 if (dump_enabled_p ())
10903 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10904 "not vectorized: different sized masks "
10905 "types in statement, ");
10906 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10907 mask_type);
10908 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10909 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10910 vectype);
10911 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10913 return NULL_TREE;
10915 else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
10916 != VECTOR_BOOLEAN_TYPE_P (vectype))
10918 if (dump_enabled_p ())
10920 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10921 "not vectorized: mixed mask and "
10922 "nonmask vector types in statement, ");
10923 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10924 mask_type);
10925 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10926 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10927 vectype);
10928 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10930 return NULL_TREE;
10934 /* We may compare boolean value loaded as vector of integers.
10935 Fix mask_type in such case. */
10936 if (mask_type
10937 && !VECTOR_BOOLEAN_TYPE_P (mask_type)
10938 && gimple_code (stmt) == GIMPLE_ASSIGN
10939 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
10940 mask_type = build_same_sized_truth_vector_type (mask_type);
10943 /* No mask_type should mean loop invariant predicate.
10944 This is probably a subject for optimization in if-conversion. */
10945 if (!mask_type && dump_enabled_p ())
10947 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10948 "not vectorized: can't compute mask type "
10949 "for statement, ");
10950 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10952 return mask_type;