PR tree-optimization/85699
[official-gcc.git] / gcc / tree-vect-stmts.c
blob1e8ccbce2af0d0c02ad2cd48a3ffabde4d0e9e7a
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "tree-ssa-loop-niter.h"
54 #include "gimple-fold.h"
56 /* For lang_hooks.types.type_for_mode. */
57 #include "langhooks.h"
59 /* Return the vectorized type for the given statement. */
61 tree
62 stmt_vectype (struct _stmt_vec_info *stmt_info)
64 return STMT_VINFO_VECTYPE (stmt_info);
67 /* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69 bool
70 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
77 if (!loop_vinfo)
78 return false;
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
82 return (bb->loop_father == loop->inner);
85 /* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
89 unsigned
90 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92 int misalign, enum vect_cost_model_location where)
94 if ((kind == vector_load || kind == unaligned_load)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96 kind = vector_gather_load;
97 if ((kind == vector_store || kind == unaligned_store)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_scatter_store;
100 if (body_cost_vec)
102 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
103 stmt_info_for_cost si = { count, kind,
104 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
105 misalign };
106 body_cost_vec->safe_push (si);
107 return (unsigned)
108 (builtin_vectorization_cost (kind, vectype, misalign) * count);
110 else
111 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
112 count, kind, stmt_info, misalign, where);
115 /* Return a variable of type ELEM_TYPE[NELEMS]. */
117 static tree
118 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
120 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
121 "vect_array");
124 /* ARRAY is an array of vectors created by create_vector_array.
125 Return an SSA_NAME for the vector in index N. The reference
126 is part of the vectorization of STMT and the vector is associated
127 with scalar destination SCALAR_DEST. */
129 static tree
130 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
131 tree array, unsigned HOST_WIDE_INT n)
133 tree vect_type, vect, vect_name, array_ref;
134 gimple *new_stmt;
136 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
137 vect_type = TREE_TYPE (TREE_TYPE (array));
138 vect = vect_create_destination_var (scalar_dest, vect_type);
139 array_ref = build4 (ARRAY_REF, vect_type, array,
140 build_int_cst (size_type_node, n),
141 NULL_TREE, NULL_TREE);
143 new_stmt = gimple_build_assign (vect, array_ref);
144 vect_name = make_ssa_name (vect, new_stmt);
145 gimple_assign_set_lhs (new_stmt, vect_name);
146 vect_finish_stmt_generation (stmt, new_stmt, gsi);
148 return vect_name;
151 /* ARRAY is an array of vectors created by create_vector_array.
152 Emit code to store SSA_NAME VECT in index N of the array.
153 The store is part of the vectorization of STMT. */
155 static void
156 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
157 tree array, unsigned HOST_WIDE_INT n)
159 tree array_ref;
160 gimple *new_stmt;
162 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
163 build_int_cst (size_type_node, n),
164 NULL_TREE, NULL_TREE);
166 new_stmt = gimple_build_assign (array_ref, vect);
167 vect_finish_stmt_generation (stmt, new_stmt, gsi);
170 /* PTR is a pointer to an array of type TYPE. Return a representation
171 of *PTR. The memory reference replaces those in FIRST_DR
172 (and its group). */
174 static tree
175 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
177 tree mem_ref;
179 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
180 /* Arrays have the same alignment as their type. */
181 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
182 return mem_ref;
185 /* Add a clobber of variable VAR to the vectorization of STMT.
186 Emit the clobber before *GSI. */
188 static void
189 vect_clobber_variable (gimple *stmt, gimple_stmt_iterator *gsi, tree var)
191 tree clobber = build_clobber (TREE_TYPE (var));
192 gimple *new_stmt = gimple_build_assign (var, clobber);
193 vect_finish_stmt_generation (stmt, new_stmt, gsi);
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
198 /* Function vect_mark_relevant.
200 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
202 static void
203 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
204 enum vect_relevant relevant, bool live_p)
206 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
207 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
208 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
209 gimple *pattern_stmt;
211 if (dump_enabled_p ())
213 dump_printf_loc (MSG_NOTE, vect_location,
214 "mark relevant %d, live %d: ", relevant, live_p);
215 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
218 /* If this stmt is an original stmt in a pattern, we might need to mark its
219 related pattern stmt instead of the original stmt. However, such stmts
220 may have their own uses that are not in any pattern, in such cases the
221 stmt itself should be marked. */
222 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
224 /* This is the last stmt in a sequence that was detected as a
225 pattern that can potentially be vectorized. Don't mark the stmt
226 as relevant/live because it's not going to be vectorized.
227 Instead mark the pattern-stmt that replaces it. */
229 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
231 if (dump_enabled_p ())
232 dump_printf_loc (MSG_NOTE, vect_location,
233 "last stmt in pattern. don't mark"
234 " relevant/live.\n");
235 stmt_info = vinfo_for_stmt (pattern_stmt);
236 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
237 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
238 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
239 stmt = pattern_stmt;
242 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
243 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
244 STMT_VINFO_RELEVANT (stmt_info) = relevant;
246 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
247 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
249 if (dump_enabled_p ())
250 dump_printf_loc (MSG_NOTE, vect_location,
251 "already marked relevant/live.\n");
252 return;
255 worklist->safe_push (stmt);
259 /* Function is_simple_and_all_uses_invariant
261 Return true if STMT is simple and all uses of it are invariant. */
263 bool
264 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
266 tree op;
267 gimple *def_stmt;
268 ssa_op_iter iter;
270 if (!is_gimple_assign (stmt))
271 return false;
273 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
275 enum vect_def_type dt = vect_uninitialized_def;
277 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
279 if (dump_enabled_p ())
280 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
281 "use not simple.\n");
282 return false;
285 if (dt != vect_external_def && dt != vect_constant_def)
286 return false;
288 return true;
291 /* Function vect_stmt_relevant_p.
293 Return true if STMT in loop that is represented by LOOP_VINFO is
294 "relevant for vectorization".
296 A stmt is considered "relevant for vectorization" if:
297 - it has uses outside the loop.
298 - it has vdefs (it alters memory).
299 - control stmts in the loop (except for the exit condition).
301 CHECKME: what other side effects would the vectorizer allow? */
303 static bool
304 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
305 enum vect_relevant *relevant, bool *live_p)
307 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
308 ssa_op_iter op_iter;
309 imm_use_iterator imm_iter;
310 use_operand_p use_p;
311 def_operand_p def_p;
313 *relevant = vect_unused_in_scope;
314 *live_p = false;
316 /* cond stmt other than loop exit cond. */
317 if (is_ctrl_stmt (stmt)
318 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
319 != loop_exit_ctrl_vec_info_type)
320 *relevant = vect_used_in_scope;
322 /* changing memory. */
323 if (gimple_code (stmt) != GIMPLE_PHI)
324 if (gimple_vdef (stmt)
325 && !gimple_clobber_p (stmt))
327 if (dump_enabled_p ())
328 dump_printf_loc (MSG_NOTE, vect_location,
329 "vec_stmt_relevant_p: stmt has vdefs.\n");
330 *relevant = vect_used_in_scope;
333 /* uses outside the loop. */
334 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
336 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
338 basic_block bb = gimple_bb (USE_STMT (use_p));
339 if (!flow_bb_inside_loop_p (loop, bb))
341 if (dump_enabled_p ())
342 dump_printf_loc (MSG_NOTE, vect_location,
343 "vec_stmt_relevant_p: used out of loop.\n");
345 if (is_gimple_debug (USE_STMT (use_p)))
346 continue;
348 /* We expect all such uses to be in the loop exit phis
349 (because of loop closed form) */
350 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
351 gcc_assert (bb == single_exit (loop)->dest);
353 *live_p = true;
358 if (*live_p && *relevant == vect_unused_in_scope
359 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
361 if (dump_enabled_p ())
362 dump_printf_loc (MSG_NOTE, vect_location,
363 "vec_stmt_relevant_p: stmt live but not relevant.\n");
364 *relevant = vect_used_only_live;
367 return (*live_p || *relevant);
371 /* Function exist_non_indexing_operands_for_use_p
373 USE is one of the uses attached to STMT. Check if USE is
374 used in STMT for anything other than indexing an array. */
376 static bool
377 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
379 tree operand;
380 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
382 /* USE corresponds to some operand in STMT. If there is no data
383 reference in STMT, then any operand that corresponds to USE
384 is not indexing an array. */
385 if (!STMT_VINFO_DATA_REF (stmt_info))
386 return true;
388 /* STMT has a data_ref. FORNOW this means that its of one of
389 the following forms:
390 -1- ARRAY_REF = var
391 -2- var = ARRAY_REF
392 (This should have been verified in analyze_data_refs).
394 'var' in the second case corresponds to a def, not a use,
395 so USE cannot correspond to any operands that are not used
396 for array indexing.
398 Therefore, all we need to check is if STMT falls into the
399 first case, and whether var corresponds to USE. */
401 if (!gimple_assign_copy_p (stmt))
403 if (is_gimple_call (stmt)
404 && gimple_call_internal_p (stmt))
406 internal_fn ifn = gimple_call_internal_fn (stmt);
407 int mask_index = internal_fn_mask_index (ifn);
408 if (mask_index >= 0
409 && use == gimple_call_arg (stmt, mask_index))
410 return true;
411 int stored_value_index = internal_fn_stored_value_index (ifn);
412 if (stored_value_index >= 0
413 && use == gimple_call_arg (stmt, stored_value_index))
414 return true;
415 if (internal_gather_scatter_fn_p (ifn)
416 && use == gimple_call_arg (stmt, 1))
417 return true;
419 return false;
422 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
423 return false;
424 operand = gimple_assign_rhs1 (stmt);
425 if (TREE_CODE (operand) != SSA_NAME)
426 return false;
428 if (operand == use)
429 return true;
431 return false;
436 Function process_use.
438 Inputs:
439 - a USE in STMT in a loop represented by LOOP_VINFO
440 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
441 that defined USE. This is done by calling mark_relevant and passing it
442 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
443 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
444 be performed.
446 Outputs:
447 Generally, LIVE_P and RELEVANT are used to define the liveness and
448 relevance info of the DEF_STMT of this USE:
449 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
450 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
451 Exceptions:
452 - case 1: If USE is used only for address computations (e.g. array indexing),
453 which does not need to be directly vectorized, then the liveness/relevance
454 of the respective DEF_STMT is left unchanged.
455 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
456 skip DEF_STMT cause it had already been processed.
457 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
458 be modified accordingly.
460 Return true if everything is as expected. Return false otherwise. */
462 static bool
463 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
464 enum vect_relevant relevant, vec<gimple *> *worklist,
465 bool force)
467 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
468 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
469 stmt_vec_info dstmt_vinfo;
470 basic_block bb, def_bb;
471 gimple *def_stmt;
472 enum vect_def_type dt;
474 /* case 1: we are only interested in uses that need to be vectorized. Uses
475 that are used for address computation are not considered relevant. */
476 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
477 return true;
479 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
481 if (dump_enabled_p ())
482 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
483 "not vectorized: unsupported use in stmt.\n");
484 return false;
487 if (!def_stmt || gimple_nop_p (def_stmt))
488 return true;
490 def_bb = gimple_bb (def_stmt);
491 if (!flow_bb_inside_loop_p (loop, def_bb))
493 if (dump_enabled_p ())
494 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
495 return true;
498 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
499 DEF_STMT must have already been processed, because this should be the
500 only way that STMT, which is a reduction-phi, was put in the worklist,
501 as there should be no other uses for DEF_STMT in the loop. So we just
502 check that everything is as expected, and we are done. */
503 dstmt_vinfo = vinfo_for_stmt (def_stmt);
504 bb = gimple_bb (stmt);
505 if (gimple_code (stmt) == GIMPLE_PHI
506 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
507 && gimple_code (def_stmt) != GIMPLE_PHI
508 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
509 && bb->loop_father == def_bb->loop_father)
511 if (dump_enabled_p ())
512 dump_printf_loc (MSG_NOTE, vect_location,
513 "reduc-stmt defining reduc-phi in the same nest.\n");
514 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
515 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
516 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
517 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
518 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
519 return true;
522 /* case 3a: outer-loop stmt defining an inner-loop stmt:
523 outer-loop-header-bb:
524 d = def_stmt
525 inner-loop:
526 stmt # use (d)
527 outer-loop-tail-bb:
528 ... */
529 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
531 if (dump_enabled_p ())
532 dump_printf_loc (MSG_NOTE, vect_location,
533 "outer-loop def-stmt defining inner-loop stmt.\n");
535 switch (relevant)
537 case vect_unused_in_scope:
538 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
539 vect_used_in_scope : vect_unused_in_scope;
540 break;
542 case vect_used_in_outer_by_reduction:
543 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
544 relevant = vect_used_by_reduction;
545 break;
547 case vect_used_in_outer:
548 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
549 relevant = vect_used_in_scope;
550 break;
552 case vect_used_in_scope:
553 break;
555 default:
556 gcc_unreachable ();
560 /* case 3b: inner-loop stmt defining an outer-loop stmt:
561 outer-loop-header-bb:
563 inner-loop:
564 d = def_stmt
565 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
566 stmt # use (d) */
567 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
569 if (dump_enabled_p ())
570 dump_printf_loc (MSG_NOTE, vect_location,
571 "inner-loop def-stmt defining outer-loop stmt.\n");
573 switch (relevant)
575 case vect_unused_in_scope:
576 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
577 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
578 vect_used_in_outer_by_reduction : vect_unused_in_scope;
579 break;
581 case vect_used_by_reduction:
582 case vect_used_only_live:
583 relevant = vect_used_in_outer_by_reduction;
584 break;
586 case vect_used_in_scope:
587 relevant = vect_used_in_outer;
588 break;
590 default:
591 gcc_unreachable ();
594 /* We are also not interested in uses on loop PHI backedges that are
595 inductions. Otherwise we'll needlessly vectorize the IV increment
596 and cause hybrid SLP for SLP inductions. Unless the PHI is live
597 of course. */
598 else if (gimple_code (stmt) == GIMPLE_PHI
599 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
600 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
601 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
602 == use))
604 if (dump_enabled_p ())
605 dump_printf_loc (MSG_NOTE, vect_location,
606 "induction value on backedge.\n");
607 return true;
611 vect_mark_relevant (worklist, def_stmt, relevant, false);
612 return true;
616 /* Function vect_mark_stmts_to_be_vectorized.
618 Not all stmts in the loop need to be vectorized. For example:
620 for i...
621 for j...
622 1. T0 = i + j
623 2. T1 = a[T0]
625 3. j = j + 1
627 Stmt 1 and 3 do not need to be vectorized, because loop control and
628 addressing of vectorized data-refs are handled differently.
630 This pass detects such stmts. */
632 bool
633 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
635 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
636 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
637 unsigned int nbbs = loop->num_nodes;
638 gimple_stmt_iterator si;
639 gimple *stmt;
640 unsigned int i;
641 stmt_vec_info stmt_vinfo;
642 basic_block bb;
643 gimple *phi;
644 bool live_p;
645 enum vect_relevant relevant;
647 if (dump_enabled_p ())
648 dump_printf_loc (MSG_NOTE, vect_location,
649 "=== vect_mark_stmts_to_be_vectorized ===\n");
651 auto_vec<gimple *, 64> worklist;
653 /* 1. Init worklist. */
654 for (i = 0; i < nbbs; i++)
656 bb = bbs[i];
657 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
659 phi = gsi_stmt (si);
660 if (dump_enabled_p ())
662 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
663 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
666 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
667 vect_mark_relevant (&worklist, phi, relevant, live_p);
669 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
671 stmt = gsi_stmt (si);
672 if (dump_enabled_p ())
674 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
675 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
678 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
679 vect_mark_relevant (&worklist, stmt, relevant, live_p);
683 /* 2. Process_worklist */
684 while (worklist.length () > 0)
686 use_operand_p use_p;
687 ssa_op_iter iter;
689 stmt = worklist.pop ();
690 if (dump_enabled_p ())
692 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
693 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
696 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
697 (DEF_STMT) as relevant/irrelevant according to the relevance property
698 of STMT. */
699 stmt_vinfo = vinfo_for_stmt (stmt);
700 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
702 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
703 propagated as is to the DEF_STMTs of its USEs.
705 One exception is when STMT has been identified as defining a reduction
706 variable; in this case we set the relevance to vect_used_by_reduction.
707 This is because we distinguish between two kinds of relevant stmts -
708 those that are used by a reduction computation, and those that are
709 (also) used by a regular computation. This allows us later on to
710 identify stmts that are used solely by a reduction, and therefore the
711 order of the results that they produce does not have to be kept. */
713 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
715 case vect_reduction_def:
716 gcc_assert (relevant != vect_unused_in_scope);
717 if (relevant != vect_unused_in_scope
718 && relevant != vect_used_in_scope
719 && relevant != vect_used_by_reduction
720 && relevant != vect_used_only_live)
722 if (dump_enabled_p ())
723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
724 "unsupported use of reduction.\n");
725 return false;
727 break;
729 case vect_nested_cycle:
730 if (relevant != vect_unused_in_scope
731 && relevant != vect_used_in_outer_by_reduction
732 && relevant != vect_used_in_outer)
734 if (dump_enabled_p ())
735 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
736 "unsupported use of nested cycle.\n");
738 return false;
740 break;
742 case vect_double_reduction_def:
743 if (relevant != vect_unused_in_scope
744 && relevant != vect_used_by_reduction
745 && relevant != vect_used_only_live)
747 if (dump_enabled_p ())
748 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
749 "unsupported use of double reduction.\n");
751 return false;
753 break;
755 default:
756 break;
759 if (is_pattern_stmt_p (stmt_vinfo))
761 /* Pattern statements are not inserted into the code, so
762 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
763 have to scan the RHS or function arguments instead. */
764 if (is_gimple_assign (stmt))
766 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
767 tree op = gimple_assign_rhs1 (stmt);
769 i = 1;
770 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
772 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
773 relevant, &worklist, false)
774 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
775 relevant, &worklist, false))
776 return false;
777 i = 2;
779 for (; i < gimple_num_ops (stmt); i++)
781 op = gimple_op (stmt, i);
782 if (TREE_CODE (op) == SSA_NAME
783 && !process_use (stmt, op, loop_vinfo, relevant,
784 &worklist, false))
785 return false;
788 else if (is_gimple_call (stmt))
790 for (i = 0; i < gimple_call_num_args (stmt); i++)
792 tree arg = gimple_call_arg (stmt, i);
793 if (!process_use (stmt, arg, loop_vinfo, relevant,
794 &worklist, false))
795 return false;
799 else
800 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
802 tree op = USE_FROM_PTR (use_p);
803 if (!process_use (stmt, op, loop_vinfo, relevant,
804 &worklist, false))
805 return false;
808 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
810 gather_scatter_info gs_info;
811 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
812 gcc_unreachable ();
813 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
814 &worklist, true))
815 return false;
817 } /* while worklist */
819 return true;
823 /* Function vect_model_simple_cost.
825 Models cost for simple operations, i.e. those that only emit ncopies of a
826 single op. Right now, this does not account for multiple insns that could
827 be generated for the single vector op. We will handle that shortly. */
829 void
830 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
831 enum vect_def_type *dt,
832 int ndts,
833 stmt_vector_for_cost *prologue_cost_vec,
834 stmt_vector_for_cost *body_cost_vec)
836 int i;
837 int inside_cost = 0, prologue_cost = 0;
839 /* The SLP costs were already calculated during SLP tree build. */
840 gcc_assert (!PURE_SLP_STMT (stmt_info));
842 /* Cost the "broadcast" of a scalar operand in to a vector operand.
843 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
844 cost model. */
845 for (i = 0; i < ndts; i++)
846 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
847 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
848 stmt_info, 0, vect_prologue);
850 /* Pass the inside-of-loop statements to the target-specific cost model. */
851 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
852 stmt_info, 0, vect_body);
854 if (dump_enabled_p ())
855 dump_printf_loc (MSG_NOTE, vect_location,
856 "vect_model_simple_cost: inside_cost = %d, "
857 "prologue_cost = %d .\n", inside_cost, prologue_cost);
861 /* Model cost for type demotion and promotion operations. PWR is normally
862 zero for single-step promotions and demotions. It will be one if
863 two-step promotion/demotion is required, and so on. Each additional
864 step doubles the number of instructions required. */
866 static void
867 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
868 enum vect_def_type *dt, int pwr)
870 int i, tmp;
871 int inside_cost = 0, prologue_cost = 0;
872 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
873 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
874 void *target_cost_data;
876 /* The SLP costs were already calculated during SLP tree build. */
877 gcc_assert (!PURE_SLP_STMT (stmt_info));
879 if (loop_vinfo)
880 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
881 else
882 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
884 for (i = 0; i < pwr + 1; i++)
886 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
887 (i + 1) : i;
888 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
889 vec_promote_demote, stmt_info, 0,
890 vect_body);
893 /* FORNOW: Assuming maximum 2 args per stmts. */
894 for (i = 0; i < 2; i++)
895 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
896 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
897 stmt_info, 0, vect_prologue);
899 if (dump_enabled_p ())
900 dump_printf_loc (MSG_NOTE, vect_location,
901 "vect_model_promotion_demotion_cost: inside_cost = %d, "
902 "prologue_cost = %d .\n", inside_cost, prologue_cost);
905 /* Function vect_model_store_cost
907 Models cost for stores. In the case of grouped accesses, one access
908 has the overhead of the grouped access attributed to it. */
910 void
911 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
912 vect_memory_access_type memory_access_type,
913 vec_load_store_type vls_type, slp_tree slp_node,
914 stmt_vector_for_cost *prologue_cost_vec,
915 stmt_vector_for_cost *body_cost_vec)
917 unsigned int inside_cost = 0, prologue_cost = 0;
918 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
919 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
920 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
922 if (vls_type == VLS_STORE_INVARIANT)
923 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
924 stmt_info, 0, vect_prologue);
926 /* Grouped stores update all elements in the group at once,
927 so we want the DR for the first statement. */
928 if (!slp_node && grouped_access_p)
930 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
931 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
934 /* True if we should include any once-per-group costs as well as
935 the cost of the statement itself. For SLP we only get called
936 once per group anyhow. */
937 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
939 /* We assume that the cost of a single store-lanes instruction is
940 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
941 access is instead being provided by a permute-and-store operation,
942 include the cost of the permutes. */
943 if (first_stmt_p
944 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
946 /* Uses a high and low interleave or shuffle operations for each
947 needed permute. */
948 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
949 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
950 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
951 stmt_info, 0, vect_body);
953 if (dump_enabled_p ())
954 dump_printf_loc (MSG_NOTE, vect_location,
955 "vect_model_store_cost: strided group_size = %d .\n",
956 group_size);
959 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
960 /* Costs of the stores. */
961 if (memory_access_type == VMAT_ELEMENTWISE
962 || memory_access_type == VMAT_GATHER_SCATTER)
964 /* N scalar stores plus extracting the elements. */
965 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
966 inside_cost += record_stmt_cost (body_cost_vec,
967 ncopies * assumed_nunits,
968 scalar_store, stmt_info, 0, vect_body);
970 else
971 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
973 if (memory_access_type == VMAT_ELEMENTWISE
974 || memory_access_type == VMAT_STRIDED_SLP)
976 /* N scalar stores plus extracting the elements. */
977 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
978 inside_cost += record_stmt_cost (body_cost_vec,
979 ncopies * assumed_nunits,
980 vec_to_scalar, stmt_info, 0, vect_body);
983 if (dump_enabled_p ())
984 dump_printf_loc (MSG_NOTE, vect_location,
985 "vect_model_store_cost: inside_cost = %d, "
986 "prologue_cost = %d .\n", inside_cost, prologue_cost);
990 /* Calculate cost of DR's memory access. */
991 void
992 vect_get_store_cost (struct data_reference *dr, int ncopies,
993 unsigned int *inside_cost,
994 stmt_vector_for_cost *body_cost_vec)
996 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
997 gimple *stmt = DR_STMT (dr);
998 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1000 switch (alignment_support_scheme)
1002 case dr_aligned:
1004 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1005 vector_store, stmt_info, 0,
1006 vect_body);
1008 if (dump_enabled_p ())
1009 dump_printf_loc (MSG_NOTE, vect_location,
1010 "vect_model_store_cost: aligned.\n");
1011 break;
1014 case dr_unaligned_supported:
1016 /* Here, we assign an additional cost for the unaligned store. */
1017 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1018 unaligned_store, stmt_info,
1019 DR_MISALIGNMENT (dr), vect_body);
1020 if (dump_enabled_p ())
1021 dump_printf_loc (MSG_NOTE, vect_location,
1022 "vect_model_store_cost: unaligned supported by "
1023 "hardware.\n");
1024 break;
1027 case dr_unaligned_unsupported:
1029 *inside_cost = VECT_MAX_COST;
1031 if (dump_enabled_p ())
1032 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1033 "vect_model_store_cost: unsupported access.\n");
1034 break;
1037 default:
1038 gcc_unreachable ();
1043 /* Function vect_model_load_cost
1045 Models cost for loads. In the case of grouped accesses, one access has
1046 the overhead of the grouped access attributed to it. Since unaligned
1047 accesses are supported for loads, we also account for the costs of the
1048 access scheme chosen. */
1050 void
1051 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1052 vect_memory_access_type memory_access_type,
1053 slp_tree slp_node,
1054 stmt_vector_for_cost *prologue_cost_vec,
1055 stmt_vector_for_cost *body_cost_vec)
1057 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1058 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1059 unsigned int inside_cost = 0, prologue_cost = 0;
1060 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1062 /* Grouped loads read all elements in the group at once,
1063 so we want the DR for the first statement. */
1064 if (!slp_node && grouped_access_p)
1066 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1067 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1070 /* True if we should include any once-per-group costs as well as
1071 the cost of the statement itself. For SLP we only get called
1072 once per group anyhow. */
1073 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1075 /* We assume that the cost of a single load-lanes instruction is
1076 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1077 access is instead being provided by a load-and-permute operation,
1078 include the cost of the permutes. */
1079 if (first_stmt_p
1080 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1082 /* Uses an even and odd extract operations or shuffle operations
1083 for each needed permute. */
1084 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1085 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1086 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1087 stmt_info, 0, vect_body);
1089 if (dump_enabled_p ())
1090 dump_printf_loc (MSG_NOTE, vect_location,
1091 "vect_model_load_cost: strided group_size = %d .\n",
1092 group_size);
1095 /* The loads themselves. */
1096 if (memory_access_type == VMAT_ELEMENTWISE
1097 || memory_access_type == VMAT_GATHER_SCATTER)
1099 /* N scalar loads plus gathering them into a vector. */
1100 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1101 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1102 inside_cost += record_stmt_cost (body_cost_vec,
1103 ncopies * assumed_nunits,
1104 scalar_load, stmt_info, 0, vect_body);
1106 else
1107 vect_get_load_cost (dr, ncopies, first_stmt_p,
1108 &inside_cost, &prologue_cost,
1109 prologue_cost_vec, body_cost_vec, true);
1110 if (memory_access_type == VMAT_ELEMENTWISE
1111 || memory_access_type == VMAT_STRIDED_SLP)
1112 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1113 stmt_info, 0, vect_body);
1115 if (dump_enabled_p ())
1116 dump_printf_loc (MSG_NOTE, vect_location,
1117 "vect_model_load_cost: inside_cost = %d, "
1118 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1122 /* Calculate cost of DR's memory access. */
1123 void
1124 vect_get_load_cost (struct data_reference *dr, int ncopies,
1125 bool add_realign_cost, unsigned int *inside_cost,
1126 unsigned int *prologue_cost,
1127 stmt_vector_for_cost *prologue_cost_vec,
1128 stmt_vector_for_cost *body_cost_vec,
1129 bool record_prologue_costs)
1131 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1132 gimple *stmt = DR_STMT (dr);
1133 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1135 switch (alignment_support_scheme)
1137 case dr_aligned:
1139 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1140 stmt_info, 0, vect_body);
1142 if (dump_enabled_p ())
1143 dump_printf_loc (MSG_NOTE, vect_location,
1144 "vect_model_load_cost: aligned.\n");
1146 break;
1148 case dr_unaligned_supported:
1150 /* Here, we assign an additional cost for the unaligned load. */
1151 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1152 unaligned_load, stmt_info,
1153 DR_MISALIGNMENT (dr), vect_body);
1155 if (dump_enabled_p ())
1156 dump_printf_loc (MSG_NOTE, vect_location,
1157 "vect_model_load_cost: unaligned supported by "
1158 "hardware.\n");
1160 break;
1162 case dr_explicit_realign:
1164 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1165 vector_load, stmt_info, 0, vect_body);
1166 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1167 vec_perm, stmt_info, 0, vect_body);
1169 /* FIXME: If the misalignment remains fixed across the iterations of
1170 the containing loop, the following cost should be added to the
1171 prologue costs. */
1172 if (targetm.vectorize.builtin_mask_for_load)
1173 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1174 stmt_info, 0, vect_body);
1176 if (dump_enabled_p ())
1177 dump_printf_loc (MSG_NOTE, vect_location,
1178 "vect_model_load_cost: explicit realign\n");
1180 break;
1182 case dr_explicit_realign_optimized:
1184 if (dump_enabled_p ())
1185 dump_printf_loc (MSG_NOTE, vect_location,
1186 "vect_model_load_cost: unaligned software "
1187 "pipelined.\n");
1189 /* Unaligned software pipeline has a load of an address, an initial
1190 load, and possibly a mask operation to "prime" the loop. However,
1191 if this is an access in a group of loads, which provide grouped
1192 access, then the above cost should only be considered for one
1193 access in the group. Inside the loop, there is a load op
1194 and a realignment op. */
1196 if (add_realign_cost && record_prologue_costs)
1198 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1199 vector_stmt, stmt_info,
1200 0, vect_prologue);
1201 if (targetm.vectorize.builtin_mask_for_load)
1202 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1203 vector_stmt, stmt_info,
1204 0, vect_prologue);
1207 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1208 stmt_info, 0, vect_body);
1209 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1210 stmt_info, 0, vect_body);
1212 if (dump_enabled_p ())
1213 dump_printf_loc (MSG_NOTE, vect_location,
1214 "vect_model_load_cost: explicit realign optimized"
1215 "\n");
1217 break;
1220 case dr_unaligned_unsupported:
1222 *inside_cost = VECT_MAX_COST;
1224 if (dump_enabled_p ())
1225 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1226 "vect_model_load_cost: unsupported access.\n");
1227 break;
1230 default:
1231 gcc_unreachable ();
1235 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1236 the loop preheader for the vectorized stmt STMT. */
1238 static void
1239 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1241 if (gsi)
1242 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1243 else
1245 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1246 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1248 if (loop_vinfo)
1250 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1251 basic_block new_bb;
1252 edge pe;
1254 if (nested_in_vect_loop_p (loop, stmt))
1255 loop = loop->inner;
1257 pe = loop_preheader_edge (loop);
1258 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1259 gcc_assert (!new_bb);
1261 else
1263 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1264 basic_block bb;
1265 gimple_stmt_iterator gsi_bb_start;
1267 gcc_assert (bb_vinfo);
1268 bb = BB_VINFO_BB (bb_vinfo);
1269 gsi_bb_start = gsi_after_labels (bb);
1270 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1274 if (dump_enabled_p ())
1276 dump_printf_loc (MSG_NOTE, vect_location,
1277 "created new init_stmt: ");
1278 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1282 /* Function vect_init_vector.
1284 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1285 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1286 vector type a vector with all elements equal to VAL is created first.
1287 Place the initialization at BSI if it is not NULL. Otherwise, place the
1288 initialization at the loop preheader.
1289 Return the DEF of INIT_STMT.
1290 It will be used in the vectorization of STMT. */
1292 tree
1293 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1295 gimple *init_stmt;
1296 tree new_temp;
1298 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1299 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1301 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1302 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1304 /* Scalar boolean value should be transformed into
1305 all zeros or all ones value before building a vector. */
1306 if (VECTOR_BOOLEAN_TYPE_P (type))
1308 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1309 tree false_val = build_zero_cst (TREE_TYPE (type));
1311 if (CONSTANT_CLASS_P (val))
1312 val = integer_zerop (val) ? false_val : true_val;
1313 else
1315 new_temp = make_ssa_name (TREE_TYPE (type));
1316 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1317 val, true_val, false_val);
1318 vect_init_vector_1 (stmt, init_stmt, gsi);
1319 val = new_temp;
1322 else if (CONSTANT_CLASS_P (val))
1323 val = fold_convert (TREE_TYPE (type), val);
1324 else
1326 new_temp = make_ssa_name (TREE_TYPE (type));
1327 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1328 init_stmt = gimple_build_assign (new_temp,
1329 fold_build1 (VIEW_CONVERT_EXPR,
1330 TREE_TYPE (type),
1331 val));
1332 else
1333 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1334 vect_init_vector_1 (stmt, init_stmt, gsi);
1335 val = new_temp;
1338 val = build_vector_from_val (type, val);
1341 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1342 init_stmt = gimple_build_assign (new_temp, val);
1343 vect_init_vector_1 (stmt, init_stmt, gsi);
1344 return new_temp;
1347 /* Function vect_get_vec_def_for_operand_1.
1349 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1350 DT that will be used in the vectorized stmt. */
1352 tree
1353 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1355 tree vec_oprnd;
1356 gimple *vec_stmt;
1357 stmt_vec_info def_stmt_info = NULL;
1359 switch (dt)
1361 /* operand is a constant or a loop invariant. */
1362 case vect_constant_def:
1363 case vect_external_def:
1364 /* Code should use vect_get_vec_def_for_operand. */
1365 gcc_unreachable ();
1367 /* operand is defined inside the loop. */
1368 case vect_internal_def:
1370 /* Get the def from the vectorized stmt. */
1371 def_stmt_info = vinfo_for_stmt (def_stmt);
1373 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1374 /* Get vectorized pattern statement. */
1375 if (!vec_stmt
1376 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1377 && !STMT_VINFO_RELEVANT (def_stmt_info))
1378 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1379 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1380 gcc_assert (vec_stmt);
1381 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1382 vec_oprnd = PHI_RESULT (vec_stmt);
1383 else if (is_gimple_call (vec_stmt))
1384 vec_oprnd = gimple_call_lhs (vec_stmt);
1385 else
1386 vec_oprnd = gimple_assign_lhs (vec_stmt);
1387 return vec_oprnd;
1390 /* operand is defined by a loop header phi. */
1391 case vect_reduction_def:
1392 case vect_double_reduction_def:
1393 case vect_nested_cycle:
1394 case vect_induction_def:
1396 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1398 /* Get the def from the vectorized stmt. */
1399 def_stmt_info = vinfo_for_stmt (def_stmt);
1400 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1401 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1402 vec_oprnd = PHI_RESULT (vec_stmt);
1403 else
1404 vec_oprnd = gimple_get_lhs (vec_stmt);
1405 return vec_oprnd;
1408 default:
1409 gcc_unreachable ();
1414 /* Function vect_get_vec_def_for_operand.
1416 OP is an operand in STMT. This function returns a (vector) def that will be
1417 used in the vectorized stmt for STMT.
1419 In the case that OP is an SSA_NAME which is defined in the loop, then
1420 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1422 In case OP is an invariant or constant, a new stmt that creates a vector def
1423 needs to be introduced. VECTYPE may be used to specify a required type for
1424 vector invariant. */
1426 tree
1427 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1429 gimple *def_stmt;
1430 enum vect_def_type dt;
1431 bool is_simple_use;
1432 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1433 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1435 if (dump_enabled_p ())
1437 dump_printf_loc (MSG_NOTE, vect_location,
1438 "vect_get_vec_def_for_operand: ");
1439 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1440 dump_printf (MSG_NOTE, "\n");
1443 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1444 gcc_assert (is_simple_use);
1445 if (def_stmt && dump_enabled_p ())
1447 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1448 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1451 if (dt == vect_constant_def || dt == vect_external_def)
1453 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1454 tree vector_type;
1456 if (vectype)
1457 vector_type = vectype;
1458 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1459 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1460 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1461 else
1462 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1464 gcc_assert (vector_type);
1465 return vect_init_vector (stmt, op, vector_type, NULL);
1467 else
1468 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1472 /* Function vect_get_vec_def_for_stmt_copy
1474 Return a vector-def for an operand. This function is used when the
1475 vectorized stmt to be created (by the caller to this function) is a "copy"
1476 created in case the vectorized result cannot fit in one vector, and several
1477 copies of the vector-stmt are required. In this case the vector-def is
1478 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1479 of the stmt that defines VEC_OPRND.
1480 DT is the type of the vector def VEC_OPRND.
1482 Context:
1483 In case the vectorization factor (VF) is bigger than the number
1484 of elements that can fit in a vectype (nunits), we have to generate
1485 more than one vector stmt to vectorize the scalar stmt. This situation
1486 arises when there are multiple data-types operated upon in the loop; the
1487 smallest data-type determines the VF, and as a result, when vectorizing
1488 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1489 vector stmt (each computing a vector of 'nunits' results, and together
1490 computing 'VF' results in each iteration). This function is called when
1491 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1492 which VF=16 and nunits=4, so the number of copies required is 4):
1494 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1496 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1497 VS1.1: vx.1 = memref1 VS1.2
1498 VS1.2: vx.2 = memref2 VS1.3
1499 VS1.3: vx.3 = memref3
1501 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1502 VSnew.1: vz1 = vx.1 + ... VSnew.2
1503 VSnew.2: vz2 = vx.2 + ... VSnew.3
1504 VSnew.3: vz3 = vx.3 + ...
1506 The vectorization of S1 is explained in vectorizable_load.
1507 The vectorization of S2:
1508 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1509 the function 'vect_get_vec_def_for_operand' is called to
1510 get the relevant vector-def for each operand of S2. For operand x it
1511 returns the vector-def 'vx.0'.
1513 To create the remaining copies of the vector-stmt (VSnew.j), this
1514 function is called to get the relevant vector-def for each operand. It is
1515 obtained from the respective VS1.j stmt, which is recorded in the
1516 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1518 For example, to obtain the vector-def 'vx.1' in order to create the
1519 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1520 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1521 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1522 and return its def ('vx.1').
1523 Overall, to create the above sequence this function will be called 3 times:
1524 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1525 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1526 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1528 tree
1529 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1531 gimple *vec_stmt_for_operand;
1532 stmt_vec_info def_stmt_info;
1534 /* Do nothing; can reuse same def. */
1535 if (dt == vect_external_def || dt == vect_constant_def )
1536 return vec_oprnd;
1538 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1539 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1540 gcc_assert (def_stmt_info);
1541 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1542 gcc_assert (vec_stmt_for_operand);
1543 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1544 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1545 else
1546 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1547 return vec_oprnd;
1551 /* Get vectorized definitions for the operands to create a copy of an original
1552 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1554 void
1555 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1556 vec<tree> *vec_oprnds0,
1557 vec<tree> *vec_oprnds1)
1559 tree vec_oprnd = vec_oprnds0->pop ();
1561 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1562 vec_oprnds0->quick_push (vec_oprnd);
1564 if (vec_oprnds1 && vec_oprnds1->length ())
1566 vec_oprnd = vec_oprnds1->pop ();
1567 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1568 vec_oprnds1->quick_push (vec_oprnd);
1573 /* Get vectorized definitions for OP0 and OP1. */
1575 void
1576 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1577 vec<tree> *vec_oprnds0,
1578 vec<tree> *vec_oprnds1,
1579 slp_tree slp_node)
1581 if (slp_node)
1583 int nops = (op1 == NULL_TREE) ? 1 : 2;
1584 auto_vec<tree> ops (nops);
1585 auto_vec<vec<tree> > vec_defs (nops);
1587 ops.quick_push (op0);
1588 if (op1)
1589 ops.quick_push (op1);
1591 vect_get_slp_defs (ops, slp_node, &vec_defs);
1593 *vec_oprnds0 = vec_defs[0];
1594 if (op1)
1595 *vec_oprnds1 = vec_defs[1];
1597 else
1599 tree vec_oprnd;
1601 vec_oprnds0->create (1);
1602 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1603 vec_oprnds0->quick_push (vec_oprnd);
1605 if (op1)
1607 vec_oprnds1->create (1);
1608 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1609 vec_oprnds1->quick_push (vec_oprnd);
1614 /* Helper function called by vect_finish_replace_stmt and
1615 vect_finish_stmt_generation. Set the location of the new
1616 statement and create a stmt_vec_info for it. */
1618 static void
1619 vect_finish_stmt_generation_1 (gimple *stmt, gimple *vec_stmt)
1621 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1622 vec_info *vinfo = stmt_info->vinfo;
1624 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1626 if (dump_enabled_p ())
1628 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1629 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1632 gimple_set_location (vec_stmt, gimple_location (stmt));
1634 /* While EH edges will generally prevent vectorization, stmt might
1635 e.g. be in a must-not-throw region. Ensure newly created stmts
1636 that could throw are part of the same region. */
1637 int lp_nr = lookup_stmt_eh_lp (stmt);
1638 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1639 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1642 /* Replace the scalar statement STMT with a new vector statement VEC_STMT,
1643 which sets the same scalar result as STMT did. */
1645 void
1646 vect_finish_replace_stmt (gimple *stmt, gimple *vec_stmt)
1648 gcc_assert (gimple_get_lhs (stmt) == gimple_get_lhs (vec_stmt));
1650 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1651 gsi_replace (&gsi, vec_stmt, false);
1653 vect_finish_stmt_generation_1 (stmt, vec_stmt);
1656 /* Function vect_finish_stmt_generation.
1658 Insert a new stmt. */
1660 void
1661 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1662 gimple_stmt_iterator *gsi)
1664 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1666 if (!gsi_end_p (*gsi)
1667 && gimple_has_mem_ops (vec_stmt))
1669 gimple *at_stmt = gsi_stmt (*gsi);
1670 tree vuse = gimple_vuse (at_stmt);
1671 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1673 tree vdef = gimple_vdef (at_stmt);
1674 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1675 /* If we have an SSA vuse and insert a store, update virtual
1676 SSA form to avoid triggering the renamer. Do so only
1677 if we can easily see all uses - which is what almost always
1678 happens with the way vectorized stmts are inserted. */
1679 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1680 && ((is_gimple_assign (vec_stmt)
1681 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1682 || (is_gimple_call (vec_stmt)
1683 && !(gimple_call_flags (vec_stmt)
1684 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1686 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1687 gimple_set_vdef (vec_stmt, new_vdef);
1688 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1692 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1693 vect_finish_stmt_generation_1 (stmt, vec_stmt);
1696 /* We want to vectorize a call to combined function CFN with function
1697 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1698 as the types of all inputs. Check whether this is possible using
1699 an internal function, returning its code if so or IFN_LAST if not. */
1701 static internal_fn
1702 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1703 tree vectype_out, tree vectype_in)
1705 internal_fn ifn;
1706 if (internal_fn_p (cfn))
1707 ifn = as_internal_fn (cfn);
1708 else
1709 ifn = associated_internal_fn (fndecl);
1710 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1712 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1713 if (info.vectorizable)
1715 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1716 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1717 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1718 OPTIMIZE_FOR_SPEED))
1719 return ifn;
1722 return IFN_LAST;
1726 static tree permute_vec_elements (tree, tree, tree, gimple *,
1727 gimple_stmt_iterator *);
1729 /* Check whether a load or store statement in the loop described by
1730 LOOP_VINFO is possible in a fully-masked loop. This is testing
1731 whether the vectorizer pass has the appropriate support, as well as
1732 whether the target does.
1734 VLS_TYPE says whether the statement is a load or store and VECTYPE
1735 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1736 says how the load or store is going to be implemented and GROUP_SIZE
1737 is the number of load or store statements in the containing group.
1738 If the access is a gather load or scatter store, GS_INFO describes
1739 its arguments.
1741 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1742 supported, otherwise record the required mask types. */
1744 static void
1745 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1746 vec_load_store_type vls_type, int group_size,
1747 vect_memory_access_type memory_access_type,
1748 gather_scatter_info *gs_info)
1750 /* Invariant loads need no special support. */
1751 if (memory_access_type == VMAT_INVARIANT)
1752 return;
1754 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1755 machine_mode vecmode = TYPE_MODE (vectype);
1756 bool is_load = (vls_type == VLS_LOAD);
1757 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1759 if (is_load
1760 ? !vect_load_lanes_supported (vectype, group_size, true)
1761 : !vect_store_lanes_supported (vectype, group_size, true))
1763 if (dump_enabled_p ())
1764 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1765 "can't use a fully-masked loop because the"
1766 " target doesn't have an appropriate masked"
1767 " load/store-lanes instruction.\n");
1768 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1769 return;
1771 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1772 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1773 return;
1776 if (memory_access_type == VMAT_GATHER_SCATTER)
1778 internal_fn ifn = (is_load
1779 ? IFN_MASK_GATHER_LOAD
1780 : IFN_MASK_SCATTER_STORE);
1781 tree offset_type = TREE_TYPE (gs_info->offset);
1782 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1783 gs_info->memory_type,
1784 TYPE_SIGN (offset_type),
1785 gs_info->scale))
1787 if (dump_enabled_p ())
1788 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1789 "can't use a fully-masked loop because the"
1790 " target doesn't have an appropriate masked"
1791 " gather load or scatter store instruction.\n");
1792 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1793 return;
1795 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1796 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1797 return;
1800 if (memory_access_type != VMAT_CONTIGUOUS
1801 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1803 /* Element X of the data must come from iteration i * VF + X of the
1804 scalar loop. We need more work to support other mappings. */
1805 if (dump_enabled_p ())
1806 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1807 "can't use a fully-masked loop because an access"
1808 " isn't contiguous.\n");
1809 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1810 return;
1813 machine_mode mask_mode;
1814 if (!(targetm.vectorize.get_mask_mode
1815 (GET_MODE_NUNITS (vecmode),
1816 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1817 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1819 if (dump_enabled_p ())
1820 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1821 "can't use a fully-masked loop because the target"
1822 " doesn't have the appropriate masked load or"
1823 " store.\n");
1824 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1825 return;
1827 /* We might load more scalars than we need for permuting SLP loads.
1828 We checked in get_group_load_store_type that the extra elements
1829 don't leak into a new vector. */
1830 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1831 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1832 unsigned int nvectors;
1833 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1834 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1835 else
1836 gcc_unreachable ();
1839 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1840 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1841 that needs to be applied to all loads and stores in a vectorized loop.
1842 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1844 MASK_TYPE is the type of both masks. If new statements are needed,
1845 insert them before GSI. */
1847 static tree
1848 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1849 gimple_stmt_iterator *gsi)
1851 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1852 if (!loop_mask)
1853 return vec_mask;
1855 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1856 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1857 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1858 vec_mask, loop_mask);
1859 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1860 return and_res;
1863 /* Determine whether we can use a gather load or scatter store to vectorize
1864 strided load or store STMT by truncating the current offset to a smaller
1865 width. We need to be able to construct an offset vector:
1867 { 0, X, X*2, X*3, ... }
1869 without loss of precision, where X is STMT's DR_STEP.
1871 Return true if this is possible, describing the gather load or scatter
1872 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1874 static bool
1875 vect_truncate_gather_scatter_offset (gimple *stmt, loop_vec_info loop_vinfo,
1876 bool masked_p,
1877 gather_scatter_info *gs_info)
1879 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1880 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1881 tree step = DR_STEP (dr);
1882 if (TREE_CODE (step) != INTEGER_CST)
1884 /* ??? Perhaps we could use range information here? */
1885 if (dump_enabled_p ())
1886 dump_printf_loc (MSG_NOTE, vect_location,
1887 "cannot truncate variable step.\n");
1888 return false;
1891 /* Get the number of bits in an element. */
1892 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1893 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1894 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1896 /* Set COUNT to the upper limit on the number of elements - 1.
1897 Start with the maximum vectorization factor. */
1898 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1900 /* Try lowering COUNT to the number of scalar latch iterations. */
1901 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1902 widest_int max_iters;
1903 if (max_loop_iterations (loop, &max_iters)
1904 && max_iters < count)
1905 count = max_iters.to_shwi ();
1907 /* Try scales of 1 and the element size. */
1908 int scales[] = { 1, vect_get_scalar_dr_size (dr) };
1909 bool overflow_p = false;
1910 for (int i = 0; i < 2; ++i)
1912 int scale = scales[i];
1913 widest_int factor;
1914 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1915 continue;
1917 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
1918 in OFFSET_BITS bits. */
1919 widest_int range = wi::mul (count, factor, SIGNED, &overflow_p);
1920 if (overflow_p)
1921 continue;
1922 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1923 if (wi::min_precision (range, sign) > element_bits)
1925 overflow_p = true;
1926 continue;
1929 /* See whether the target supports the operation. */
1930 tree memory_type = TREE_TYPE (DR_REF (dr));
1931 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
1932 memory_type, element_bits, sign, scale,
1933 &gs_info->ifn, &gs_info->element_type))
1934 continue;
1936 tree offset_type = build_nonstandard_integer_type (element_bits,
1937 sign == UNSIGNED);
1939 gs_info->decl = NULL_TREE;
1940 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1941 but we don't need to store that here. */
1942 gs_info->base = NULL_TREE;
1943 gs_info->offset = fold_convert (offset_type, step);
1944 gs_info->offset_dt = vect_constant_def;
1945 gs_info->offset_vectype = NULL_TREE;
1946 gs_info->scale = scale;
1947 gs_info->memory_type = memory_type;
1948 return true;
1951 if (overflow_p && dump_enabled_p ())
1952 dump_printf_loc (MSG_NOTE, vect_location,
1953 "truncating gather/scatter offset to %d bits"
1954 " might change its value.\n", element_bits);
1956 return false;
1959 /* Return true if we can use gather/scatter internal functions to
1960 vectorize STMT, which is a grouped or strided load or store.
1961 MASKED_P is true if load or store is conditional. When returning
1962 true, fill in GS_INFO with the information required to perform the
1963 operation. */
1965 static bool
1966 vect_use_strided_gather_scatters_p (gimple *stmt, loop_vec_info loop_vinfo,
1967 bool masked_p,
1968 gather_scatter_info *gs_info)
1970 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info)
1971 || gs_info->decl)
1972 return vect_truncate_gather_scatter_offset (stmt, loop_vinfo,
1973 masked_p, gs_info);
1975 scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
1976 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1977 tree offset_type = TREE_TYPE (gs_info->offset);
1978 unsigned int offset_bits = TYPE_PRECISION (offset_type);
1980 /* Enforced by vect_check_gather_scatter. */
1981 gcc_assert (element_bits >= offset_bits);
1983 /* If the elements are wider than the offset, convert the offset to the
1984 same width, without changing its sign. */
1985 if (element_bits > offset_bits)
1987 bool unsigned_p = TYPE_UNSIGNED (offset_type);
1988 offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
1989 gs_info->offset = fold_convert (offset_type, gs_info->offset);
1992 if (dump_enabled_p ())
1993 dump_printf_loc (MSG_NOTE, vect_location,
1994 "using gather/scatter for strided/grouped access,"
1995 " scale = %d\n", gs_info->scale);
1997 return true;
2000 /* STMT is a non-strided load or store, meaning that it accesses
2001 elements with a known constant step. Return -1 if that step
2002 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2004 static int
2005 compare_step_with_zero (gimple *stmt)
2007 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2008 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2009 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
2010 size_zero_node);
2013 /* If the target supports a permute mask that reverses the elements in
2014 a vector of type VECTYPE, return that mask, otherwise return null. */
2016 static tree
2017 perm_mask_for_reverse (tree vectype)
2019 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2021 /* The encoding has a single stepped pattern. */
2022 vec_perm_builder sel (nunits, 1, 3);
2023 for (int i = 0; i < 3; ++i)
2024 sel.quick_push (nunits - 1 - i);
2026 vec_perm_indices indices (sel, 1, nunits);
2027 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2028 return NULL_TREE;
2029 return vect_gen_perm_mask_checked (vectype, indices);
2032 /* STMT is either a masked or unconditional store. Return the value
2033 being stored. */
2035 tree
2036 vect_get_store_rhs (gimple *stmt)
2038 if (gassign *assign = dyn_cast <gassign *> (stmt))
2040 gcc_assert (gimple_assign_single_p (assign));
2041 return gimple_assign_rhs1 (assign);
2043 if (gcall *call = dyn_cast <gcall *> (stmt))
2045 internal_fn ifn = gimple_call_internal_fn (call);
2046 int index = internal_fn_stored_value_index (ifn);
2047 gcc_assert (index >= 0);
2048 return gimple_call_arg (stmt, index);
2050 gcc_unreachable ();
2053 /* A subroutine of get_load_store_type, with a subset of the same
2054 arguments. Handle the case where STMT is part of a grouped load
2055 or store.
2057 For stores, the statements in the group are all consecutive
2058 and there is no gap at the end. For loads, the statements in the
2059 group might not be consecutive; there can be gaps between statements
2060 as well as at the end. */
2062 static bool
2063 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
2064 bool masked_p, vec_load_store_type vls_type,
2065 vect_memory_access_type *memory_access_type,
2066 gather_scatter_info *gs_info)
2068 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2069 vec_info *vinfo = stmt_info->vinfo;
2070 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2071 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2072 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
2073 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2074 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
2075 bool single_element_p = (stmt == first_stmt
2076 && !GROUP_NEXT_ELEMENT (stmt_info));
2077 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
2078 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2080 /* True if the vectorized statements would access beyond the last
2081 statement in the group. */
2082 bool overrun_p = false;
2084 /* True if we can cope with such overrun by peeling for gaps, so that
2085 there is at least one final scalar iteration after the vector loop. */
2086 bool can_overrun_p = (!masked_p
2087 && vls_type == VLS_LOAD
2088 && loop_vinfo
2089 && !loop->inner);
2091 /* There can only be a gap at the end of the group if the stride is
2092 known at compile time. */
2093 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
2095 /* Stores can't yet have gaps. */
2096 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2098 if (slp)
2100 if (STMT_VINFO_STRIDED_P (stmt_info))
2102 /* Try to use consecutive accesses of GROUP_SIZE elements,
2103 separated by the stride, until we have a complete vector.
2104 Fall back to scalar accesses if that isn't possible. */
2105 if (multiple_p (nunits, group_size))
2106 *memory_access_type = VMAT_STRIDED_SLP;
2107 else
2108 *memory_access_type = VMAT_ELEMENTWISE;
2110 else
2112 overrun_p = loop_vinfo && gap != 0;
2113 if (overrun_p && vls_type != VLS_LOAD)
2115 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2116 "Grouped store with gaps requires"
2117 " non-consecutive accesses\n");
2118 return false;
2120 /* An overrun is fine if the trailing elements are smaller
2121 than the alignment boundary B. Every vector access will
2122 be a multiple of B and so we are guaranteed to access a
2123 non-gap element in the same B-sized block. */
2124 if (overrun_p
2125 && gap < (vect_known_alignment_in_bytes (first_dr)
2126 / vect_get_scalar_dr_size (first_dr)))
2127 overrun_p = false;
2128 if (overrun_p && !can_overrun_p)
2130 if (dump_enabled_p ())
2131 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2132 "Peeling for outer loop is not supported\n");
2133 return false;
2135 *memory_access_type = VMAT_CONTIGUOUS;
2138 else
2140 /* We can always handle this case using elementwise accesses,
2141 but see if something more efficient is available. */
2142 *memory_access_type = VMAT_ELEMENTWISE;
2144 /* If there is a gap at the end of the group then these optimizations
2145 would access excess elements in the last iteration. */
2146 bool would_overrun_p = (gap != 0);
2147 /* An overrun is fine if the trailing elements are smaller than the
2148 alignment boundary B. Every vector access will be a multiple of B
2149 and so we are guaranteed to access a non-gap element in the
2150 same B-sized block. */
2151 if (would_overrun_p
2152 && !masked_p
2153 && gap < (vect_known_alignment_in_bytes (first_dr)
2154 / vect_get_scalar_dr_size (first_dr)))
2155 would_overrun_p = false;
2157 if (!STMT_VINFO_STRIDED_P (stmt_info)
2158 && (can_overrun_p || !would_overrun_p)
2159 && compare_step_with_zero (stmt) > 0)
2161 /* First cope with the degenerate case of a single-element
2162 vector. */
2163 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2164 *memory_access_type = VMAT_CONTIGUOUS;
2166 /* Otherwise try using LOAD/STORE_LANES. */
2167 if (*memory_access_type == VMAT_ELEMENTWISE
2168 && (vls_type == VLS_LOAD
2169 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2170 : vect_store_lanes_supported (vectype, group_size,
2171 masked_p)))
2173 *memory_access_type = VMAT_LOAD_STORE_LANES;
2174 overrun_p = would_overrun_p;
2177 /* If that fails, try using permuting loads. */
2178 if (*memory_access_type == VMAT_ELEMENTWISE
2179 && (vls_type == VLS_LOAD
2180 ? vect_grouped_load_supported (vectype, single_element_p,
2181 group_size)
2182 : vect_grouped_store_supported (vectype, group_size)))
2184 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2185 overrun_p = would_overrun_p;
2189 /* As a last resort, trying using a gather load or scatter store.
2191 ??? Although the code can handle all group sizes correctly,
2192 it probably isn't a win to use separate strided accesses based
2193 on nearby locations. Or, even if it's a win over scalar code,
2194 it might not be a win over vectorizing at a lower VF, if that
2195 allows us to use contiguous accesses. */
2196 if (*memory_access_type == VMAT_ELEMENTWISE
2197 && single_element_p
2198 && loop_vinfo
2199 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2200 masked_p, gs_info))
2201 *memory_access_type = VMAT_GATHER_SCATTER;
2204 if (vls_type != VLS_LOAD && first_stmt == stmt)
2206 /* STMT is the leader of the group. Check the operands of all the
2207 stmts of the group. */
2208 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
2209 while (next_stmt)
2211 tree op = vect_get_store_rhs (next_stmt);
2212 gimple *def_stmt;
2213 enum vect_def_type dt;
2214 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
2216 if (dump_enabled_p ())
2217 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2218 "use not simple.\n");
2219 return false;
2221 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2225 if (overrun_p)
2227 gcc_assert (can_overrun_p);
2228 if (dump_enabled_p ())
2229 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2230 "Data access with gaps requires scalar "
2231 "epilogue loop\n");
2232 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2235 return true;
2238 /* A subroutine of get_load_store_type, with a subset of the same
2239 arguments. Handle the case where STMT is a load or store that
2240 accesses consecutive elements with a negative step. */
2242 static vect_memory_access_type
2243 get_negative_load_store_type (gimple *stmt, tree vectype,
2244 vec_load_store_type vls_type,
2245 unsigned int ncopies)
2247 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2248 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2249 dr_alignment_support alignment_support_scheme;
2251 if (ncopies > 1)
2253 if (dump_enabled_p ())
2254 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2255 "multiple types with negative step.\n");
2256 return VMAT_ELEMENTWISE;
2259 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
2260 if (alignment_support_scheme != dr_aligned
2261 && alignment_support_scheme != dr_unaligned_supported)
2263 if (dump_enabled_p ())
2264 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2265 "negative step but alignment required.\n");
2266 return VMAT_ELEMENTWISE;
2269 if (vls_type == VLS_STORE_INVARIANT)
2271 if (dump_enabled_p ())
2272 dump_printf_loc (MSG_NOTE, vect_location,
2273 "negative step with invariant source;"
2274 " no permute needed.\n");
2275 return VMAT_CONTIGUOUS_DOWN;
2278 if (!perm_mask_for_reverse (vectype))
2280 if (dump_enabled_p ())
2281 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2282 "negative step and reversing not supported.\n");
2283 return VMAT_ELEMENTWISE;
2286 return VMAT_CONTIGUOUS_REVERSE;
2289 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
2290 if there is a memory access type that the vectorized form can use,
2291 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2292 or scatters, fill in GS_INFO accordingly.
2294 SLP says whether we're performing SLP rather than loop vectorization.
2295 MASKED_P is true if the statement is conditional on a vectorized mask.
2296 VECTYPE is the vector type that the vectorized statements will use.
2297 NCOPIES is the number of vector statements that will be needed. */
2299 static bool
2300 get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p,
2301 vec_load_store_type vls_type, unsigned int ncopies,
2302 vect_memory_access_type *memory_access_type,
2303 gather_scatter_info *gs_info)
2305 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2306 vec_info *vinfo = stmt_info->vinfo;
2307 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2308 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2309 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2311 *memory_access_type = VMAT_GATHER_SCATTER;
2312 gimple *def_stmt;
2313 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
2314 gcc_unreachable ();
2315 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
2316 &gs_info->offset_dt,
2317 &gs_info->offset_vectype))
2319 if (dump_enabled_p ())
2320 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2321 "%s index use not simple.\n",
2322 vls_type == VLS_LOAD ? "gather" : "scatter");
2323 return false;
2326 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2328 if (!get_group_load_store_type (stmt, vectype, slp, masked_p, vls_type,
2329 memory_access_type, gs_info))
2330 return false;
2332 else if (STMT_VINFO_STRIDED_P (stmt_info))
2334 gcc_assert (!slp);
2335 if (loop_vinfo
2336 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2337 masked_p, gs_info))
2338 *memory_access_type = VMAT_GATHER_SCATTER;
2339 else
2340 *memory_access_type = VMAT_ELEMENTWISE;
2342 else
2344 int cmp = compare_step_with_zero (stmt);
2345 if (cmp < 0)
2346 *memory_access_type = get_negative_load_store_type
2347 (stmt, vectype, vls_type, ncopies);
2348 else if (cmp == 0)
2350 gcc_assert (vls_type == VLS_LOAD);
2351 *memory_access_type = VMAT_INVARIANT;
2353 else
2354 *memory_access_type = VMAT_CONTIGUOUS;
2357 if ((*memory_access_type == VMAT_ELEMENTWISE
2358 || *memory_access_type == VMAT_STRIDED_SLP)
2359 && !nunits.is_constant ())
2361 if (dump_enabled_p ())
2362 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2363 "Not using elementwise accesses due to variable "
2364 "vectorization factor.\n");
2365 return false;
2368 /* FIXME: At the moment the cost model seems to underestimate the
2369 cost of using elementwise accesses. This check preserves the
2370 traditional behavior until that can be fixed. */
2371 if (*memory_access_type == VMAT_ELEMENTWISE
2372 && !STMT_VINFO_STRIDED_P (stmt_info)
2373 && !(stmt == GROUP_FIRST_ELEMENT (stmt_info)
2374 && !GROUP_NEXT_ELEMENT (stmt_info)
2375 && !pow2p_hwi (GROUP_SIZE (stmt_info))))
2377 if (dump_enabled_p ())
2378 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2379 "not falling back to elementwise accesses\n");
2380 return false;
2382 return true;
2385 /* Return true if boolean argument MASK is suitable for vectorizing
2386 conditional load or store STMT. When returning true, store the type
2387 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2388 in *MASK_VECTYPE_OUT. */
2390 static bool
2391 vect_check_load_store_mask (gimple *stmt, tree mask,
2392 vect_def_type *mask_dt_out,
2393 tree *mask_vectype_out)
2395 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2397 if (dump_enabled_p ())
2398 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2399 "mask argument is not a boolean.\n");
2400 return false;
2403 if (TREE_CODE (mask) != SSA_NAME)
2405 if (dump_enabled_p ())
2406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2407 "mask argument is not an SSA name.\n");
2408 return false;
2411 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2412 gimple *def_stmt;
2413 enum vect_def_type mask_dt;
2414 tree mask_vectype;
2415 if (!vect_is_simple_use (mask, stmt_info->vinfo, &def_stmt, &mask_dt,
2416 &mask_vectype))
2418 if (dump_enabled_p ())
2419 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2420 "mask use not simple.\n");
2421 return false;
2424 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2425 if (!mask_vectype)
2426 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2428 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2430 if (dump_enabled_p ())
2431 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2432 "could not find an appropriate vector mask type.\n");
2433 return false;
2436 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2437 TYPE_VECTOR_SUBPARTS (vectype)))
2439 if (dump_enabled_p ())
2441 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2442 "vector mask type ");
2443 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
2444 dump_printf (MSG_MISSED_OPTIMIZATION,
2445 " does not match vector data type ");
2446 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
2447 dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
2449 return false;
2452 *mask_dt_out = mask_dt;
2453 *mask_vectype_out = mask_vectype;
2454 return true;
2457 /* Return true if stored value RHS is suitable for vectorizing store
2458 statement STMT. When returning true, store the type of the
2459 definition in *RHS_DT_OUT, the type of the vectorized store value in
2460 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2462 static bool
2463 vect_check_store_rhs (gimple *stmt, tree rhs, vect_def_type *rhs_dt_out,
2464 tree *rhs_vectype_out, vec_load_store_type *vls_type_out)
2466 /* In the case this is a store from a constant make sure
2467 native_encode_expr can handle it. */
2468 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2470 if (dump_enabled_p ())
2471 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2472 "cannot encode constant as a byte sequence.\n");
2473 return false;
2476 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2477 gimple *def_stmt;
2478 enum vect_def_type rhs_dt;
2479 tree rhs_vectype;
2480 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &def_stmt, &rhs_dt,
2481 &rhs_vectype))
2483 if (dump_enabled_p ())
2484 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2485 "use not simple.\n");
2486 return false;
2489 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2490 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2492 if (dump_enabled_p ())
2493 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2494 "incompatible vector types.\n");
2495 return false;
2498 *rhs_dt_out = rhs_dt;
2499 *rhs_vectype_out = rhs_vectype;
2500 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2501 *vls_type_out = VLS_STORE_INVARIANT;
2502 else
2503 *vls_type_out = VLS_STORE;
2504 return true;
2507 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2508 Note that we support masks with floating-point type, in which case the
2509 floats are interpreted as a bitmask. */
2511 static tree
2512 vect_build_all_ones_mask (gimple *stmt, tree masktype)
2514 if (TREE_CODE (masktype) == INTEGER_TYPE)
2515 return build_int_cst (masktype, -1);
2516 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2518 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2519 mask = build_vector_from_val (masktype, mask);
2520 return vect_init_vector (stmt, mask, masktype, NULL);
2522 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2524 REAL_VALUE_TYPE r;
2525 long tmp[6];
2526 for (int j = 0; j < 6; ++j)
2527 tmp[j] = -1;
2528 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2529 tree mask = build_real (TREE_TYPE (masktype), r);
2530 mask = build_vector_from_val (masktype, mask);
2531 return vect_init_vector (stmt, mask, masktype, NULL);
2533 gcc_unreachable ();
2536 /* Build an all-zero merge value of type VECTYPE while vectorizing
2537 STMT as a gather load. */
2539 static tree
2540 vect_build_zero_merge_argument (gimple *stmt, tree vectype)
2542 tree merge;
2543 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2544 merge = build_int_cst (TREE_TYPE (vectype), 0);
2545 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2547 REAL_VALUE_TYPE r;
2548 long tmp[6];
2549 for (int j = 0; j < 6; ++j)
2550 tmp[j] = 0;
2551 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2552 merge = build_real (TREE_TYPE (vectype), r);
2554 else
2555 gcc_unreachable ();
2556 merge = build_vector_from_val (vectype, merge);
2557 return vect_init_vector (stmt, merge, vectype, NULL);
2560 /* Build a gather load call while vectorizing STMT. Insert new instructions
2561 before GSI and add them to VEC_STMT. GS_INFO describes the gather load
2562 operation. If the load is conditional, MASK is the unvectorized
2563 condition and MASK_DT is its definition type, otherwise MASK is null. */
2565 static void
2566 vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
2567 gimple **vec_stmt, gather_scatter_info *gs_info,
2568 tree mask, vect_def_type mask_dt)
2570 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2571 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2572 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2573 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2574 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2575 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2576 edge pe = loop_preheader_edge (loop);
2577 enum { NARROW, NONE, WIDEN } modifier;
2578 poly_uint64 gather_off_nunits
2579 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2581 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2582 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2583 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2584 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2585 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2586 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2587 tree scaletype = TREE_VALUE (arglist);
2588 gcc_checking_assert (types_compatible_p (srctype, rettype)
2589 && (!mask || types_compatible_p (srctype, masktype)));
2591 tree perm_mask = NULL_TREE;
2592 tree mask_perm_mask = NULL_TREE;
2593 if (known_eq (nunits, gather_off_nunits))
2594 modifier = NONE;
2595 else if (known_eq (nunits * 2, gather_off_nunits))
2597 modifier = WIDEN;
2599 /* Currently widening gathers and scatters are only supported for
2600 fixed-length vectors. */
2601 int count = gather_off_nunits.to_constant ();
2602 vec_perm_builder sel (count, count, 1);
2603 for (int i = 0; i < count; ++i)
2604 sel.quick_push (i | (count / 2));
2606 vec_perm_indices indices (sel, 1, count);
2607 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2608 indices);
2610 else if (known_eq (nunits, gather_off_nunits * 2))
2612 modifier = NARROW;
2614 /* Currently narrowing gathers and scatters are only supported for
2615 fixed-length vectors. */
2616 int count = nunits.to_constant ();
2617 vec_perm_builder sel (count, count, 1);
2618 sel.quick_grow (count);
2619 for (int i = 0; i < count; ++i)
2620 sel[i] = i < count / 2 ? i : i + count / 2;
2621 vec_perm_indices indices (sel, 2, count);
2622 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2624 ncopies *= 2;
2626 if (mask)
2628 for (int i = 0; i < count; ++i)
2629 sel[i] = i | (count / 2);
2630 indices.new_vector (sel, 2, count);
2631 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2634 else
2635 gcc_unreachable ();
2637 tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
2638 vectype);
2640 tree ptr = fold_convert (ptrtype, gs_info->base);
2641 if (!is_gimple_min_invariant (ptr))
2643 gimple_seq seq;
2644 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2645 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2646 gcc_assert (!new_bb);
2649 tree scale = build_int_cst (scaletype, gs_info->scale);
2651 tree vec_oprnd0 = NULL_TREE;
2652 tree vec_mask = NULL_TREE;
2653 tree src_op = NULL_TREE;
2654 tree mask_op = NULL_TREE;
2655 tree prev_res = NULL_TREE;
2656 stmt_vec_info prev_stmt_info = NULL;
2658 if (!mask)
2660 src_op = vect_build_zero_merge_argument (stmt, rettype);
2661 mask_op = vect_build_all_ones_mask (stmt, masktype);
2664 for (int j = 0; j < ncopies; ++j)
2666 tree op, var;
2667 gimple *new_stmt;
2668 if (modifier == WIDEN && (j & 1))
2669 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2670 perm_mask, stmt, gsi);
2671 else if (j == 0)
2672 op = vec_oprnd0
2673 = vect_get_vec_def_for_operand (gs_info->offset, stmt);
2674 else
2675 op = vec_oprnd0
2676 = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
2678 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2680 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2681 TYPE_VECTOR_SUBPARTS (idxtype)));
2682 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2683 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2684 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2685 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2686 op = var;
2689 if (mask)
2691 if (mask_perm_mask && (j & 1))
2692 mask_op = permute_vec_elements (mask_op, mask_op,
2693 mask_perm_mask, stmt, gsi);
2694 else
2696 if (j == 0)
2697 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2698 else
2699 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
2701 mask_op = vec_mask;
2702 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2704 gcc_assert
2705 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2706 TYPE_VECTOR_SUBPARTS (masktype)));
2707 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2708 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2709 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR,
2710 mask_op);
2711 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2712 mask_op = var;
2715 src_op = mask_op;
2718 new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2719 mask_op, scale);
2721 if (!useless_type_conversion_p (vectype, rettype))
2723 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2724 TYPE_VECTOR_SUBPARTS (rettype)));
2725 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2726 gimple_call_set_lhs (new_stmt, op);
2727 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2728 var = make_ssa_name (vec_dest);
2729 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2730 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2732 else
2734 var = make_ssa_name (vec_dest, new_stmt);
2735 gimple_call_set_lhs (new_stmt, var);
2738 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2740 if (modifier == NARROW)
2742 if ((j & 1) == 0)
2744 prev_res = var;
2745 continue;
2747 var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
2748 new_stmt = SSA_NAME_DEF_STMT (var);
2751 if (prev_stmt_info == NULL)
2752 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2753 else
2754 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2755 prev_stmt_info = vinfo_for_stmt (new_stmt);
2759 /* Prepare the base and offset in GS_INFO for vectorization.
2760 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2761 to the vectorized offset argument for the first copy of STMT. STMT
2762 is the statement described by GS_INFO and LOOP is the containing loop. */
2764 static void
2765 vect_get_gather_scatter_ops (struct loop *loop, gimple *stmt,
2766 gather_scatter_info *gs_info,
2767 tree *dataref_ptr, tree *vec_offset)
2769 gimple_seq stmts = NULL;
2770 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2771 if (stmts != NULL)
2773 basic_block new_bb;
2774 edge pe = loop_preheader_edge (loop);
2775 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2776 gcc_assert (!new_bb);
2778 tree offset_type = TREE_TYPE (gs_info->offset);
2779 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2780 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt,
2781 offset_vectype);
2784 /* Prepare to implement a grouped or strided load or store using
2785 the gather load or scatter store operation described by GS_INFO.
2786 STMT is the load or store statement.
2788 Set *DATAREF_BUMP to the amount that should be added to the base
2789 address after each copy of the vectorized statement. Set *VEC_OFFSET
2790 to an invariant offset vector in which element I has the value
2791 I * DR_STEP / SCALE. */
2793 static void
2794 vect_get_strided_load_store_ops (gimple *stmt, loop_vec_info loop_vinfo,
2795 gather_scatter_info *gs_info,
2796 tree *dataref_bump, tree *vec_offset)
2798 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2799 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2800 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2801 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2802 gimple_seq stmts;
2804 tree bump = size_binop (MULT_EXPR,
2805 fold_convert (sizetype, DR_STEP (dr)),
2806 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2807 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2808 if (stmts)
2809 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2811 /* The offset given in GS_INFO can have pointer type, so use the element
2812 type of the vector instead. */
2813 tree offset_type = TREE_TYPE (gs_info->offset);
2814 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2815 offset_type = TREE_TYPE (offset_vectype);
2817 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2818 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2819 ssize_int (gs_info->scale));
2820 step = fold_convert (offset_type, step);
2821 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2823 /* Create {0, X, X*2, X*3, ...}. */
2824 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
2825 build_zero_cst (offset_type), step);
2826 if (stmts)
2827 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2830 /* Return the amount that should be added to a vector pointer to move
2831 to the next or previous copy of AGGR_TYPE. DR is the data reference
2832 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2833 vectorization. */
2835 static tree
2836 vect_get_data_ptr_increment (data_reference *dr, tree aggr_type,
2837 vect_memory_access_type memory_access_type)
2839 if (memory_access_type == VMAT_INVARIANT)
2840 return size_zero_node;
2842 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2843 tree step = vect_dr_behavior (dr)->step;
2844 if (tree_int_cst_sgn (step) == -1)
2845 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2846 return iv_step;
2849 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2851 static bool
2852 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2853 gimple **vec_stmt, slp_tree slp_node,
2854 tree vectype_in, enum vect_def_type *dt)
2856 tree op, vectype;
2857 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2858 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2859 unsigned ncopies;
2860 unsigned HOST_WIDE_INT nunits, num_bytes;
2862 op = gimple_call_arg (stmt, 0);
2863 vectype = STMT_VINFO_VECTYPE (stmt_info);
2865 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
2866 return false;
2868 /* Multiple types in SLP are handled by creating the appropriate number of
2869 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2870 case of SLP. */
2871 if (slp_node)
2872 ncopies = 1;
2873 else
2874 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2876 gcc_assert (ncopies >= 1);
2878 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2879 if (! char_vectype)
2880 return false;
2882 if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
2883 return false;
2885 unsigned word_bytes = num_bytes / nunits;
2887 /* The encoding uses one stepped pattern for each byte in the word. */
2888 vec_perm_builder elts (num_bytes, word_bytes, 3);
2889 for (unsigned i = 0; i < 3; ++i)
2890 for (unsigned j = 0; j < word_bytes; ++j)
2891 elts.quick_push ((i + 1) * word_bytes - j - 1);
2893 vec_perm_indices indices (elts, 1, num_bytes);
2894 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2895 return false;
2897 if (! vec_stmt)
2899 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2900 if (dump_enabled_p ())
2901 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2902 "\n");
2903 if (! slp_node)
2905 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2906 1, vector_stmt, stmt_info, 0, vect_prologue);
2907 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2908 ncopies, vec_perm, stmt_info, 0, vect_body);
2910 return true;
2913 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
2915 /* Transform. */
2916 vec<tree> vec_oprnds = vNULL;
2917 gimple *new_stmt = NULL;
2918 stmt_vec_info prev_stmt_info = NULL;
2919 for (unsigned j = 0; j < ncopies; j++)
2921 /* Handle uses. */
2922 if (j == 0)
2923 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2924 else
2925 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2927 /* Arguments are ready. create the new vector stmt. */
2928 unsigned i;
2929 tree vop;
2930 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2932 tree tem = make_ssa_name (char_vectype);
2933 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2934 char_vectype, vop));
2935 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2936 tree tem2 = make_ssa_name (char_vectype);
2937 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2938 tem, tem, bswap_vconst);
2939 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2940 tem = make_ssa_name (vectype);
2941 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2942 vectype, tem2));
2943 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2944 if (slp_node)
2945 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2948 if (slp_node)
2949 continue;
2951 if (j == 0)
2952 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2953 else
2954 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2956 prev_stmt_info = vinfo_for_stmt (new_stmt);
2959 vec_oprnds.release ();
2960 return true;
2963 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2964 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2965 in a single step. On success, store the binary pack code in
2966 *CONVERT_CODE. */
2968 static bool
2969 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2970 tree_code *convert_code)
2972 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2973 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2974 return false;
2976 tree_code code;
2977 int multi_step_cvt = 0;
2978 auto_vec <tree, 8> interm_types;
2979 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2980 &code, &multi_step_cvt,
2981 &interm_types)
2982 || multi_step_cvt)
2983 return false;
2985 *convert_code = code;
2986 return true;
2989 /* Function vectorizable_call.
2991 Check if GS performs a function call that can be vectorized.
2992 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2993 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2994 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2996 static bool
2997 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2998 slp_tree slp_node)
3000 gcall *stmt;
3001 tree vec_dest;
3002 tree scalar_dest;
3003 tree op, type;
3004 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3005 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
3006 tree vectype_out, vectype_in;
3007 poly_uint64 nunits_in;
3008 poly_uint64 nunits_out;
3009 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3010 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3011 vec_info *vinfo = stmt_info->vinfo;
3012 tree fndecl, new_temp, rhs_type;
3013 gimple *def_stmt;
3014 enum vect_def_type dt[3]
3015 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3016 int ndts = 3;
3017 gimple *new_stmt = NULL;
3018 int ncopies, j;
3019 vec<tree> vargs = vNULL;
3020 enum { NARROW, NONE, WIDEN } modifier;
3021 size_t i, nargs;
3022 tree lhs;
3024 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3025 return false;
3027 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3028 && ! vec_stmt)
3029 return false;
3031 /* Is GS a vectorizable call? */
3032 stmt = dyn_cast <gcall *> (gs);
3033 if (!stmt)
3034 return false;
3036 if (gimple_call_internal_p (stmt)
3037 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3038 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3039 /* Handled by vectorizable_load and vectorizable_store. */
3040 return false;
3042 if (gimple_call_lhs (stmt) == NULL_TREE
3043 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3044 return false;
3046 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3048 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3050 /* Process function arguments. */
3051 rhs_type = NULL_TREE;
3052 vectype_in = NULL_TREE;
3053 nargs = gimple_call_num_args (stmt);
3055 /* Bail out if the function has more than three arguments, we do not have
3056 interesting builtin functions to vectorize with more than two arguments
3057 except for fma. No arguments is also not good. */
3058 if (nargs == 0 || nargs > 3)
3059 return false;
3061 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
3062 if (gimple_call_internal_p (stmt)
3063 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3065 nargs = 0;
3066 rhs_type = unsigned_type_node;
3069 for (i = 0; i < nargs; i++)
3071 tree opvectype;
3073 op = gimple_call_arg (stmt, i);
3075 /* We can only handle calls with arguments of the same type. */
3076 if (rhs_type
3077 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3079 if (dump_enabled_p ())
3080 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3081 "argument types differ.\n");
3082 return false;
3084 if (!rhs_type)
3085 rhs_type = TREE_TYPE (op);
3087 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
3089 if (dump_enabled_p ())
3090 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3091 "use not simple.\n");
3092 return false;
3095 if (!vectype_in)
3096 vectype_in = opvectype;
3097 else if (opvectype
3098 && opvectype != vectype_in)
3100 if (dump_enabled_p ())
3101 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3102 "argument vector types differ.\n");
3103 return false;
3106 /* If all arguments are external or constant defs use a vector type with
3107 the same size as the output vector type. */
3108 if (!vectype_in)
3109 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3110 if (vec_stmt)
3111 gcc_assert (vectype_in);
3112 if (!vectype_in)
3114 if (dump_enabled_p ())
3116 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3117 "no vectype for scalar type ");
3118 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3119 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3122 return false;
3125 /* FORNOW */
3126 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3127 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3128 if (known_eq (nunits_in * 2, nunits_out))
3129 modifier = NARROW;
3130 else if (known_eq (nunits_out, nunits_in))
3131 modifier = NONE;
3132 else if (known_eq (nunits_out * 2, nunits_in))
3133 modifier = WIDEN;
3134 else
3135 return false;
3137 /* We only handle functions that do not read or clobber memory. */
3138 if (gimple_vuse (stmt))
3140 if (dump_enabled_p ())
3141 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3142 "function reads from or writes to memory.\n");
3143 return false;
3146 /* For now, we only vectorize functions if a target specific builtin
3147 is available. TODO -- in some cases, it might be profitable to
3148 insert the calls for pieces of the vector, in order to be able
3149 to vectorize other operations in the loop. */
3150 fndecl = NULL_TREE;
3151 internal_fn ifn = IFN_LAST;
3152 combined_fn cfn = gimple_call_combined_fn (stmt);
3153 tree callee = gimple_call_fndecl (stmt);
3155 /* First try using an internal function. */
3156 tree_code convert_code = ERROR_MARK;
3157 if (cfn != CFN_LAST
3158 && (modifier == NONE
3159 || (modifier == NARROW
3160 && simple_integer_narrowing (vectype_out, vectype_in,
3161 &convert_code))))
3162 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3163 vectype_in);
3165 /* If that fails, try asking for a target-specific built-in function. */
3166 if (ifn == IFN_LAST)
3168 if (cfn != CFN_LAST)
3169 fndecl = targetm.vectorize.builtin_vectorized_function
3170 (cfn, vectype_out, vectype_in);
3171 else if (callee)
3172 fndecl = targetm.vectorize.builtin_md_vectorized_function
3173 (callee, vectype_out, vectype_in);
3176 if (ifn == IFN_LAST && !fndecl)
3178 if (cfn == CFN_GOMP_SIMD_LANE
3179 && !slp_node
3180 && loop_vinfo
3181 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3182 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3183 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3184 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3186 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3187 { 0, 1, 2, ... vf - 1 } vector. */
3188 gcc_assert (nargs == 0);
3190 else if (modifier == NONE
3191 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3192 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3193 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3194 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
3195 vectype_in, dt);
3196 else
3198 if (dump_enabled_p ())
3199 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3200 "function is not vectorizable.\n");
3201 return false;
3205 if (slp_node)
3206 ncopies = 1;
3207 else if (modifier == NARROW && ifn == IFN_LAST)
3208 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3209 else
3210 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3212 /* Sanity check: make sure that at least one copy of the vectorized stmt
3213 needs to be generated. */
3214 gcc_assert (ncopies >= 1);
3216 if (!vec_stmt) /* transformation not required. */
3218 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3219 if (dump_enabled_p ())
3220 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
3221 "\n");
3222 if (!slp_node)
3224 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
3225 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3226 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
3227 vec_promote_demote, stmt_info, 0, vect_body);
3230 return true;
3233 /* Transform. */
3235 if (dump_enabled_p ())
3236 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3238 /* Handle def. */
3239 scalar_dest = gimple_call_lhs (stmt);
3240 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3242 prev_stmt_info = NULL;
3243 if (modifier == NONE || ifn != IFN_LAST)
3245 tree prev_res = NULL_TREE;
3246 for (j = 0; j < ncopies; ++j)
3248 /* Build argument list for the vectorized call. */
3249 if (j == 0)
3250 vargs.create (nargs);
3251 else
3252 vargs.truncate (0);
3254 if (slp_node)
3256 auto_vec<vec<tree> > vec_defs (nargs);
3257 vec<tree> vec_oprnds0;
3259 for (i = 0; i < nargs; i++)
3260 vargs.quick_push (gimple_call_arg (stmt, i));
3261 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3262 vec_oprnds0 = vec_defs[0];
3264 /* Arguments are ready. Create the new vector stmt. */
3265 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3267 size_t k;
3268 for (k = 0; k < nargs; k++)
3270 vec<tree> vec_oprndsk = vec_defs[k];
3271 vargs[k] = vec_oprndsk[i];
3273 if (modifier == NARROW)
3275 tree half_res = make_ssa_name (vectype_in);
3276 gcall *call
3277 = gimple_build_call_internal_vec (ifn, vargs);
3278 gimple_call_set_lhs (call, half_res);
3279 gimple_call_set_nothrow (call, true);
3280 new_stmt = call;
3281 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3282 if ((i & 1) == 0)
3284 prev_res = half_res;
3285 continue;
3287 new_temp = make_ssa_name (vec_dest);
3288 new_stmt = gimple_build_assign (new_temp, convert_code,
3289 prev_res, half_res);
3291 else
3293 gcall *call;
3294 if (ifn != IFN_LAST)
3295 call = gimple_build_call_internal_vec (ifn, vargs);
3296 else
3297 call = gimple_build_call_vec (fndecl, vargs);
3298 new_temp = make_ssa_name (vec_dest, call);
3299 gimple_call_set_lhs (call, new_temp);
3300 gimple_call_set_nothrow (call, true);
3301 new_stmt = call;
3303 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3304 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3307 for (i = 0; i < nargs; i++)
3309 vec<tree> vec_oprndsi = vec_defs[i];
3310 vec_oprndsi.release ();
3312 continue;
3315 for (i = 0; i < nargs; i++)
3317 op = gimple_call_arg (stmt, i);
3318 if (j == 0)
3319 vec_oprnd0
3320 = vect_get_vec_def_for_operand (op, stmt);
3321 else
3323 vec_oprnd0 = gimple_call_arg (new_stmt, i);
3324 vec_oprnd0
3325 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3328 vargs.quick_push (vec_oprnd0);
3331 if (gimple_call_internal_p (stmt)
3332 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3334 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3335 tree new_var
3336 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3337 gimple *init_stmt = gimple_build_assign (new_var, cst);
3338 vect_init_vector_1 (stmt, init_stmt, NULL);
3339 new_temp = make_ssa_name (vec_dest);
3340 new_stmt = gimple_build_assign (new_temp, new_var);
3342 else if (modifier == NARROW)
3344 tree half_res = make_ssa_name (vectype_in);
3345 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3346 gimple_call_set_lhs (call, half_res);
3347 gimple_call_set_nothrow (call, true);
3348 new_stmt = call;
3349 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3350 if ((j & 1) == 0)
3352 prev_res = half_res;
3353 continue;
3355 new_temp = make_ssa_name (vec_dest);
3356 new_stmt = gimple_build_assign (new_temp, convert_code,
3357 prev_res, half_res);
3359 else
3361 gcall *call;
3362 if (ifn != IFN_LAST)
3363 call = gimple_build_call_internal_vec (ifn, vargs);
3364 else
3365 call = gimple_build_call_vec (fndecl, vargs);
3366 new_temp = make_ssa_name (vec_dest, new_stmt);
3367 gimple_call_set_lhs (call, new_temp);
3368 gimple_call_set_nothrow (call, true);
3369 new_stmt = call;
3371 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3373 if (j == (modifier == NARROW ? 1 : 0))
3374 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3375 else
3376 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3378 prev_stmt_info = vinfo_for_stmt (new_stmt);
3381 else if (modifier == NARROW)
3383 for (j = 0; j < ncopies; ++j)
3385 /* Build argument list for the vectorized call. */
3386 if (j == 0)
3387 vargs.create (nargs * 2);
3388 else
3389 vargs.truncate (0);
3391 if (slp_node)
3393 auto_vec<vec<tree> > vec_defs (nargs);
3394 vec<tree> vec_oprnds0;
3396 for (i = 0; i < nargs; i++)
3397 vargs.quick_push (gimple_call_arg (stmt, i));
3398 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3399 vec_oprnds0 = vec_defs[0];
3401 /* Arguments are ready. Create the new vector stmt. */
3402 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3404 size_t k;
3405 vargs.truncate (0);
3406 for (k = 0; k < nargs; k++)
3408 vec<tree> vec_oprndsk = vec_defs[k];
3409 vargs.quick_push (vec_oprndsk[i]);
3410 vargs.quick_push (vec_oprndsk[i + 1]);
3412 gcall *call;
3413 if (ifn != IFN_LAST)
3414 call = gimple_build_call_internal_vec (ifn, vargs);
3415 else
3416 call = gimple_build_call_vec (fndecl, vargs);
3417 new_temp = make_ssa_name (vec_dest, call);
3418 gimple_call_set_lhs (call, new_temp);
3419 gimple_call_set_nothrow (call, true);
3420 new_stmt = call;
3421 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3422 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3425 for (i = 0; i < nargs; i++)
3427 vec<tree> vec_oprndsi = vec_defs[i];
3428 vec_oprndsi.release ();
3430 continue;
3433 for (i = 0; i < nargs; i++)
3435 op = gimple_call_arg (stmt, i);
3436 if (j == 0)
3438 vec_oprnd0
3439 = vect_get_vec_def_for_operand (op, stmt);
3440 vec_oprnd1
3441 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3443 else
3445 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3446 vec_oprnd0
3447 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3448 vec_oprnd1
3449 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3452 vargs.quick_push (vec_oprnd0);
3453 vargs.quick_push (vec_oprnd1);
3456 new_stmt = gimple_build_call_vec (fndecl, vargs);
3457 new_temp = make_ssa_name (vec_dest, new_stmt);
3458 gimple_call_set_lhs (new_stmt, new_temp);
3459 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3461 if (j == 0)
3462 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3463 else
3464 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3466 prev_stmt_info = vinfo_for_stmt (new_stmt);
3469 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3471 else
3472 /* No current target implements this case. */
3473 return false;
3475 vargs.release ();
3477 /* The call in STMT might prevent it from being removed in dce.
3478 We however cannot remove it here, due to the way the ssa name
3479 it defines is mapped to the new definition. So just replace
3480 rhs of the statement with something harmless. */
3482 if (slp_node)
3483 return true;
3485 type = TREE_TYPE (scalar_dest);
3486 if (is_pattern_stmt_p (stmt_info))
3487 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3488 else
3489 lhs = gimple_call_lhs (stmt);
3491 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3492 set_vinfo_for_stmt (new_stmt, stmt_info);
3493 set_vinfo_for_stmt (stmt, NULL);
3494 STMT_VINFO_STMT (stmt_info) = new_stmt;
3495 gsi_replace (gsi, new_stmt, false);
3497 return true;
3501 struct simd_call_arg_info
3503 tree vectype;
3504 tree op;
3505 HOST_WIDE_INT linear_step;
3506 enum vect_def_type dt;
3507 unsigned int align;
3508 bool simd_lane_linear;
3511 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3512 is linear within simd lane (but not within whole loop), note it in
3513 *ARGINFO. */
3515 static void
3516 vect_simd_lane_linear (tree op, struct loop *loop,
3517 struct simd_call_arg_info *arginfo)
3519 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3521 if (!is_gimple_assign (def_stmt)
3522 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3523 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3524 return;
3526 tree base = gimple_assign_rhs1 (def_stmt);
3527 HOST_WIDE_INT linear_step = 0;
3528 tree v = gimple_assign_rhs2 (def_stmt);
3529 while (TREE_CODE (v) == SSA_NAME)
3531 tree t;
3532 def_stmt = SSA_NAME_DEF_STMT (v);
3533 if (is_gimple_assign (def_stmt))
3534 switch (gimple_assign_rhs_code (def_stmt))
3536 case PLUS_EXPR:
3537 t = gimple_assign_rhs2 (def_stmt);
3538 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3539 return;
3540 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3541 v = gimple_assign_rhs1 (def_stmt);
3542 continue;
3543 case MULT_EXPR:
3544 t = gimple_assign_rhs2 (def_stmt);
3545 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3546 return;
3547 linear_step = tree_to_shwi (t);
3548 v = gimple_assign_rhs1 (def_stmt);
3549 continue;
3550 CASE_CONVERT:
3551 t = gimple_assign_rhs1 (def_stmt);
3552 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3553 || (TYPE_PRECISION (TREE_TYPE (v))
3554 < TYPE_PRECISION (TREE_TYPE (t))))
3555 return;
3556 if (!linear_step)
3557 linear_step = 1;
3558 v = t;
3559 continue;
3560 default:
3561 return;
3563 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3564 && loop->simduid
3565 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3566 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3567 == loop->simduid))
3569 if (!linear_step)
3570 linear_step = 1;
3571 arginfo->linear_step = linear_step;
3572 arginfo->op = base;
3573 arginfo->simd_lane_linear = true;
3574 return;
3579 /* Return the number of elements in vector type VECTYPE, which is associated
3580 with a SIMD clone. At present these vectors always have a constant
3581 length. */
3583 static unsigned HOST_WIDE_INT
3584 simd_clone_subparts (tree vectype)
3586 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3589 /* Function vectorizable_simd_clone_call.
3591 Check if STMT performs a function call that can be vectorized
3592 by calling a simd clone of the function.
3593 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3594 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3595 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3597 static bool
3598 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3599 gimple **vec_stmt, slp_tree slp_node)
3601 tree vec_dest;
3602 tree scalar_dest;
3603 tree op, type;
3604 tree vec_oprnd0 = NULL_TREE;
3605 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3606 tree vectype;
3607 unsigned int nunits;
3608 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3609 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3610 vec_info *vinfo = stmt_info->vinfo;
3611 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3612 tree fndecl, new_temp;
3613 gimple *def_stmt;
3614 gimple *new_stmt = NULL;
3615 int ncopies, j;
3616 auto_vec<simd_call_arg_info> arginfo;
3617 vec<tree> vargs = vNULL;
3618 size_t i, nargs;
3619 tree lhs, rtype, ratype;
3620 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3622 /* Is STMT a vectorizable call? */
3623 if (!is_gimple_call (stmt))
3624 return false;
3626 fndecl = gimple_call_fndecl (stmt);
3627 if (fndecl == NULL_TREE)
3628 return false;
3630 struct cgraph_node *node = cgraph_node::get (fndecl);
3631 if (node == NULL || node->simd_clones == NULL)
3632 return false;
3634 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3635 return false;
3637 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3638 && ! vec_stmt)
3639 return false;
3641 if (gimple_call_lhs (stmt)
3642 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3643 return false;
3645 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3647 vectype = STMT_VINFO_VECTYPE (stmt_info);
3649 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3650 return false;
3652 /* FORNOW */
3653 if (slp_node)
3654 return false;
3656 /* Process function arguments. */
3657 nargs = gimple_call_num_args (stmt);
3659 /* Bail out if the function has zero arguments. */
3660 if (nargs == 0)
3661 return false;
3663 arginfo.reserve (nargs, true);
3665 for (i = 0; i < nargs; i++)
3667 simd_call_arg_info thisarginfo;
3668 affine_iv iv;
3670 thisarginfo.linear_step = 0;
3671 thisarginfo.align = 0;
3672 thisarginfo.op = NULL_TREE;
3673 thisarginfo.simd_lane_linear = false;
3675 op = gimple_call_arg (stmt, i);
3676 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3677 &thisarginfo.vectype)
3678 || thisarginfo.dt == vect_uninitialized_def)
3680 if (dump_enabled_p ())
3681 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3682 "use not simple.\n");
3683 return false;
3686 if (thisarginfo.dt == vect_constant_def
3687 || thisarginfo.dt == vect_external_def)
3688 gcc_assert (thisarginfo.vectype == NULL_TREE);
3689 else
3690 gcc_assert (thisarginfo.vectype != NULL_TREE);
3692 /* For linear arguments, the analyze phase should have saved
3693 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3694 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3695 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3697 gcc_assert (vec_stmt);
3698 thisarginfo.linear_step
3699 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3700 thisarginfo.op
3701 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3702 thisarginfo.simd_lane_linear
3703 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3704 == boolean_true_node);
3705 /* If loop has been peeled for alignment, we need to adjust it. */
3706 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3707 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3708 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3710 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3711 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3712 tree opt = TREE_TYPE (thisarginfo.op);
3713 bias = fold_convert (TREE_TYPE (step), bias);
3714 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3715 thisarginfo.op
3716 = fold_build2 (POINTER_TYPE_P (opt)
3717 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3718 thisarginfo.op, bias);
3721 else if (!vec_stmt
3722 && thisarginfo.dt != vect_constant_def
3723 && thisarginfo.dt != vect_external_def
3724 && loop_vinfo
3725 && TREE_CODE (op) == SSA_NAME
3726 && simple_iv (loop, loop_containing_stmt (stmt), op,
3727 &iv, false)
3728 && tree_fits_shwi_p (iv.step))
3730 thisarginfo.linear_step = tree_to_shwi (iv.step);
3731 thisarginfo.op = iv.base;
3733 else if ((thisarginfo.dt == vect_constant_def
3734 || thisarginfo.dt == vect_external_def)
3735 && POINTER_TYPE_P (TREE_TYPE (op)))
3736 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3737 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3738 linear too. */
3739 if (POINTER_TYPE_P (TREE_TYPE (op))
3740 && !thisarginfo.linear_step
3741 && !vec_stmt
3742 && thisarginfo.dt != vect_constant_def
3743 && thisarginfo.dt != vect_external_def
3744 && loop_vinfo
3745 && !slp_node
3746 && TREE_CODE (op) == SSA_NAME)
3747 vect_simd_lane_linear (op, loop, &thisarginfo);
3749 arginfo.quick_push (thisarginfo);
3752 unsigned HOST_WIDE_INT vf;
3753 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3755 if (dump_enabled_p ())
3756 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3757 "not considering SIMD clones; not yet supported"
3758 " for variable-width vectors.\n");
3759 return NULL;
3762 unsigned int badness = 0;
3763 struct cgraph_node *bestn = NULL;
3764 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3765 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3766 else
3767 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3768 n = n->simdclone->next_clone)
3770 unsigned int this_badness = 0;
3771 if (n->simdclone->simdlen > vf
3772 || n->simdclone->nargs != nargs)
3773 continue;
3774 if (n->simdclone->simdlen < vf)
3775 this_badness += (exact_log2 (vf)
3776 - exact_log2 (n->simdclone->simdlen)) * 1024;
3777 if (n->simdclone->inbranch)
3778 this_badness += 2048;
3779 int target_badness = targetm.simd_clone.usable (n);
3780 if (target_badness < 0)
3781 continue;
3782 this_badness += target_badness * 512;
3783 /* FORNOW: Have to add code to add the mask argument. */
3784 if (n->simdclone->inbranch)
3785 continue;
3786 for (i = 0; i < nargs; i++)
3788 switch (n->simdclone->args[i].arg_type)
3790 case SIMD_CLONE_ARG_TYPE_VECTOR:
3791 if (!useless_type_conversion_p
3792 (n->simdclone->args[i].orig_type,
3793 TREE_TYPE (gimple_call_arg (stmt, i))))
3794 i = -1;
3795 else if (arginfo[i].dt == vect_constant_def
3796 || arginfo[i].dt == vect_external_def
3797 || arginfo[i].linear_step)
3798 this_badness += 64;
3799 break;
3800 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3801 if (arginfo[i].dt != vect_constant_def
3802 && arginfo[i].dt != vect_external_def)
3803 i = -1;
3804 break;
3805 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3806 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3807 if (arginfo[i].dt == vect_constant_def
3808 || arginfo[i].dt == vect_external_def
3809 || (arginfo[i].linear_step
3810 != n->simdclone->args[i].linear_step))
3811 i = -1;
3812 break;
3813 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3814 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3815 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3816 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3817 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3818 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3819 /* FORNOW */
3820 i = -1;
3821 break;
3822 case SIMD_CLONE_ARG_TYPE_MASK:
3823 gcc_unreachable ();
3825 if (i == (size_t) -1)
3826 break;
3827 if (n->simdclone->args[i].alignment > arginfo[i].align)
3829 i = -1;
3830 break;
3832 if (arginfo[i].align)
3833 this_badness += (exact_log2 (arginfo[i].align)
3834 - exact_log2 (n->simdclone->args[i].alignment));
3836 if (i == (size_t) -1)
3837 continue;
3838 if (bestn == NULL || this_badness < badness)
3840 bestn = n;
3841 badness = this_badness;
3845 if (bestn == NULL)
3846 return false;
3848 for (i = 0; i < nargs; i++)
3849 if ((arginfo[i].dt == vect_constant_def
3850 || arginfo[i].dt == vect_external_def)
3851 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3853 arginfo[i].vectype
3854 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3855 i)));
3856 if (arginfo[i].vectype == NULL
3857 || (simd_clone_subparts (arginfo[i].vectype)
3858 > bestn->simdclone->simdlen))
3859 return false;
3862 fndecl = bestn->decl;
3863 nunits = bestn->simdclone->simdlen;
3864 ncopies = vf / nunits;
3866 /* If the function isn't const, only allow it in simd loops where user
3867 has asserted that at least nunits consecutive iterations can be
3868 performed using SIMD instructions. */
3869 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3870 && gimple_vuse (stmt))
3871 return false;
3873 /* Sanity check: make sure that at least one copy of the vectorized stmt
3874 needs to be generated. */
3875 gcc_assert (ncopies >= 1);
3877 if (!vec_stmt) /* transformation not required. */
3879 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3880 for (i = 0; i < nargs; i++)
3881 if ((bestn->simdclone->args[i].arg_type
3882 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3883 || (bestn->simdclone->args[i].arg_type
3884 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3886 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3887 + 1);
3888 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3889 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3890 ? size_type_node : TREE_TYPE (arginfo[i].op);
3891 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3892 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3893 tree sll = arginfo[i].simd_lane_linear
3894 ? boolean_true_node : boolean_false_node;
3895 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3897 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3898 if (dump_enabled_p ())
3899 dump_printf_loc (MSG_NOTE, vect_location,
3900 "=== vectorizable_simd_clone_call ===\n");
3901 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3902 return true;
3905 /* Transform. */
3907 if (dump_enabled_p ())
3908 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3910 /* Handle def. */
3911 scalar_dest = gimple_call_lhs (stmt);
3912 vec_dest = NULL_TREE;
3913 rtype = NULL_TREE;
3914 ratype = NULL_TREE;
3915 if (scalar_dest)
3917 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3918 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3919 if (TREE_CODE (rtype) == ARRAY_TYPE)
3921 ratype = rtype;
3922 rtype = TREE_TYPE (ratype);
3926 prev_stmt_info = NULL;
3927 for (j = 0; j < ncopies; ++j)
3929 /* Build argument list for the vectorized call. */
3930 if (j == 0)
3931 vargs.create (nargs);
3932 else
3933 vargs.truncate (0);
3935 for (i = 0; i < nargs; i++)
3937 unsigned int k, l, m, o;
3938 tree atype;
3939 op = gimple_call_arg (stmt, i);
3940 switch (bestn->simdclone->args[i].arg_type)
3942 case SIMD_CLONE_ARG_TYPE_VECTOR:
3943 atype = bestn->simdclone->args[i].vector_type;
3944 o = nunits / simd_clone_subparts (atype);
3945 for (m = j * o; m < (j + 1) * o; m++)
3947 if (simd_clone_subparts (atype)
3948 < simd_clone_subparts (arginfo[i].vectype))
3950 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3951 k = (simd_clone_subparts (arginfo[i].vectype)
3952 / simd_clone_subparts (atype));
3953 gcc_assert ((k & (k - 1)) == 0);
3954 if (m == 0)
3955 vec_oprnd0
3956 = vect_get_vec_def_for_operand (op, stmt);
3957 else
3959 vec_oprnd0 = arginfo[i].op;
3960 if ((m & (k - 1)) == 0)
3961 vec_oprnd0
3962 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3963 vec_oprnd0);
3965 arginfo[i].op = vec_oprnd0;
3966 vec_oprnd0
3967 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3968 bitsize_int (prec),
3969 bitsize_int ((m & (k - 1)) * prec));
3970 new_stmt
3971 = gimple_build_assign (make_ssa_name (atype),
3972 vec_oprnd0);
3973 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3974 vargs.safe_push (gimple_assign_lhs (new_stmt));
3976 else
3978 k = (simd_clone_subparts (atype)
3979 / simd_clone_subparts (arginfo[i].vectype));
3980 gcc_assert ((k & (k - 1)) == 0);
3981 vec<constructor_elt, va_gc> *ctor_elts;
3982 if (k != 1)
3983 vec_alloc (ctor_elts, k);
3984 else
3985 ctor_elts = NULL;
3986 for (l = 0; l < k; l++)
3988 if (m == 0 && l == 0)
3989 vec_oprnd0
3990 = vect_get_vec_def_for_operand (op, stmt);
3991 else
3992 vec_oprnd0
3993 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3994 arginfo[i].op);
3995 arginfo[i].op = vec_oprnd0;
3996 if (k == 1)
3997 break;
3998 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3999 vec_oprnd0);
4001 if (k == 1)
4002 vargs.safe_push (vec_oprnd0);
4003 else
4005 vec_oprnd0 = build_constructor (atype, ctor_elts);
4006 new_stmt
4007 = gimple_build_assign (make_ssa_name (atype),
4008 vec_oprnd0);
4009 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4010 vargs.safe_push (gimple_assign_lhs (new_stmt));
4014 break;
4015 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4016 vargs.safe_push (op);
4017 break;
4018 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4019 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4020 if (j == 0)
4022 gimple_seq stmts;
4023 arginfo[i].op
4024 = force_gimple_operand (arginfo[i].op, &stmts, true,
4025 NULL_TREE);
4026 if (stmts != NULL)
4028 basic_block new_bb;
4029 edge pe = loop_preheader_edge (loop);
4030 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4031 gcc_assert (!new_bb);
4033 if (arginfo[i].simd_lane_linear)
4035 vargs.safe_push (arginfo[i].op);
4036 break;
4038 tree phi_res = copy_ssa_name (op);
4039 gphi *new_phi = create_phi_node (phi_res, loop->header);
4040 set_vinfo_for_stmt (new_phi,
4041 new_stmt_vec_info (new_phi, loop_vinfo));
4042 add_phi_arg (new_phi, arginfo[i].op,
4043 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4044 enum tree_code code
4045 = POINTER_TYPE_P (TREE_TYPE (op))
4046 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4047 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4048 ? sizetype : TREE_TYPE (op);
4049 widest_int cst
4050 = wi::mul (bestn->simdclone->args[i].linear_step,
4051 ncopies * nunits);
4052 tree tcst = wide_int_to_tree (type, cst);
4053 tree phi_arg = copy_ssa_name (op);
4054 new_stmt
4055 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4056 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4057 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4058 set_vinfo_for_stmt (new_stmt,
4059 new_stmt_vec_info (new_stmt, loop_vinfo));
4060 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4061 UNKNOWN_LOCATION);
4062 arginfo[i].op = phi_res;
4063 vargs.safe_push (phi_res);
4065 else
4067 enum tree_code code
4068 = POINTER_TYPE_P (TREE_TYPE (op))
4069 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4070 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4071 ? sizetype : TREE_TYPE (op);
4072 widest_int cst
4073 = wi::mul (bestn->simdclone->args[i].linear_step,
4074 j * nunits);
4075 tree tcst = wide_int_to_tree (type, cst);
4076 new_temp = make_ssa_name (TREE_TYPE (op));
4077 new_stmt = gimple_build_assign (new_temp, code,
4078 arginfo[i].op, tcst);
4079 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4080 vargs.safe_push (new_temp);
4082 break;
4083 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4084 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4085 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4086 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4087 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4088 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4089 default:
4090 gcc_unreachable ();
4094 new_stmt = gimple_build_call_vec (fndecl, vargs);
4095 if (vec_dest)
4097 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4098 if (ratype)
4099 new_temp = create_tmp_var (ratype);
4100 else if (simd_clone_subparts (vectype)
4101 == simd_clone_subparts (rtype))
4102 new_temp = make_ssa_name (vec_dest, new_stmt);
4103 else
4104 new_temp = make_ssa_name (rtype, new_stmt);
4105 gimple_call_set_lhs (new_stmt, new_temp);
4107 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4109 if (vec_dest)
4111 if (simd_clone_subparts (vectype) < nunits)
4113 unsigned int k, l;
4114 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4115 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4116 k = nunits / simd_clone_subparts (vectype);
4117 gcc_assert ((k & (k - 1)) == 0);
4118 for (l = 0; l < k; l++)
4120 tree t;
4121 if (ratype)
4123 t = build_fold_addr_expr (new_temp);
4124 t = build2 (MEM_REF, vectype, t,
4125 build_int_cst (TREE_TYPE (t), l * bytes));
4127 else
4128 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4129 bitsize_int (prec), bitsize_int (l * prec));
4130 new_stmt
4131 = gimple_build_assign (make_ssa_name (vectype), t);
4132 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4133 if (j == 0 && l == 0)
4134 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4135 else
4136 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4138 prev_stmt_info = vinfo_for_stmt (new_stmt);
4141 if (ratype)
4142 vect_clobber_variable (stmt, gsi, new_temp);
4143 continue;
4145 else if (simd_clone_subparts (vectype) > nunits)
4147 unsigned int k = (simd_clone_subparts (vectype)
4148 / simd_clone_subparts (rtype));
4149 gcc_assert ((k & (k - 1)) == 0);
4150 if ((j & (k - 1)) == 0)
4151 vec_alloc (ret_ctor_elts, k);
4152 if (ratype)
4154 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4155 for (m = 0; m < o; m++)
4157 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4158 size_int (m), NULL_TREE, NULL_TREE);
4159 new_stmt
4160 = gimple_build_assign (make_ssa_name (rtype), tem);
4161 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4162 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4163 gimple_assign_lhs (new_stmt));
4165 vect_clobber_variable (stmt, gsi, new_temp);
4167 else
4168 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4169 if ((j & (k - 1)) != k - 1)
4170 continue;
4171 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4172 new_stmt
4173 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4174 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4176 if ((unsigned) j == k - 1)
4177 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4178 else
4179 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4181 prev_stmt_info = vinfo_for_stmt (new_stmt);
4182 continue;
4184 else if (ratype)
4186 tree t = build_fold_addr_expr (new_temp);
4187 t = build2 (MEM_REF, vectype, t,
4188 build_int_cst (TREE_TYPE (t), 0));
4189 new_stmt
4190 = gimple_build_assign (make_ssa_name (vec_dest), t);
4191 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4192 vect_clobber_variable (stmt, gsi, new_temp);
4196 if (j == 0)
4197 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4198 else
4199 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4201 prev_stmt_info = vinfo_for_stmt (new_stmt);
4204 vargs.release ();
4206 /* The call in STMT might prevent it from being removed in dce.
4207 We however cannot remove it here, due to the way the ssa name
4208 it defines is mapped to the new definition. So just replace
4209 rhs of the statement with something harmless. */
4211 if (slp_node)
4212 return true;
4214 if (scalar_dest)
4216 type = TREE_TYPE (scalar_dest);
4217 if (is_pattern_stmt_p (stmt_info))
4218 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
4219 else
4220 lhs = gimple_call_lhs (stmt);
4221 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4223 else
4224 new_stmt = gimple_build_nop ();
4225 set_vinfo_for_stmt (new_stmt, stmt_info);
4226 set_vinfo_for_stmt (stmt, NULL);
4227 STMT_VINFO_STMT (stmt_info) = new_stmt;
4228 gsi_replace (gsi, new_stmt, true);
4229 unlink_stmt_vdef (stmt);
4231 return true;
4235 /* Function vect_gen_widened_results_half
4237 Create a vector stmt whose code, type, number of arguments, and result
4238 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4239 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
4240 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4241 needs to be created (DECL is a function-decl of a target-builtin).
4242 STMT is the original scalar stmt that we are vectorizing. */
4244 static gimple *
4245 vect_gen_widened_results_half (enum tree_code code,
4246 tree decl,
4247 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4248 tree vec_dest, gimple_stmt_iterator *gsi,
4249 gimple *stmt)
4251 gimple *new_stmt;
4252 tree new_temp;
4254 /* Generate half of the widened result: */
4255 if (code == CALL_EXPR)
4257 /* Target specific support */
4258 if (op_type == binary_op)
4259 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4260 else
4261 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4262 new_temp = make_ssa_name (vec_dest, new_stmt);
4263 gimple_call_set_lhs (new_stmt, new_temp);
4265 else
4267 /* Generic support */
4268 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4269 if (op_type != binary_op)
4270 vec_oprnd1 = NULL;
4271 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4272 new_temp = make_ssa_name (vec_dest, new_stmt);
4273 gimple_assign_set_lhs (new_stmt, new_temp);
4275 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4277 return new_stmt;
4281 /* Get vectorized definitions for loop-based vectorization. For the first
4282 operand we call vect_get_vec_def_for_operand() (with OPRND containing
4283 scalar operand), and for the rest we get a copy with
4284 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4285 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4286 The vectors are collected into VEC_OPRNDS. */
4288 static void
4289 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
4290 vec<tree> *vec_oprnds, int multi_step_cvt)
4292 tree vec_oprnd;
4294 /* Get first vector operand. */
4295 /* All the vector operands except the very first one (that is scalar oprnd)
4296 are stmt copies. */
4297 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4298 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4299 else
4300 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
4302 vec_oprnds->quick_push (vec_oprnd);
4304 /* Get second vector operand. */
4305 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
4306 vec_oprnds->quick_push (vec_oprnd);
4308 *oprnd = vec_oprnd;
4310 /* For conversion in multiple steps, continue to get operands
4311 recursively. */
4312 if (multi_step_cvt)
4313 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
4317 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4318 For multi-step conversions store the resulting vectors and call the function
4319 recursively. */
4321 static void
4322 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4323 int multi_step_cvt, gimple *stmt,
4324 vec<tree> vec_dsts,
4325 gimple_stmt_iterator *gsi,
4326 slp_tree slp_node, enum tree_code code,
4327 stmt_vec_info *prev_stmt_info)
4329 unsigned int i;
4330 tree vop0, vop1, new_tmp, vec_dest;
4331 gimple *new_stmt;
4332 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4334 vec_dest = vec_dsts.pop ();
4336 for (i = 0; i < vec_oprnds->length (); i += 2)
4338 /* Create demotion operation. */
4339 vop0 = (*vec_oprnds)[i];
4340 vop1 = (*vec_oprnds)[i + 1];
4341 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4342 new_tmp = make_ssa_name (vec_dest, new_stmt);
4343 gimple_assign_set_lhs (new_stmt, new_tmp);
4344 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4346 if (multi_step_cvt)
4347 /* Store the resulting vector for next recursive call. */
4348 (*vec_oprnds)[i/2] = new_tmp;
4349 else
4351 /* This is the last step of the conversion sequence. Store the
4352 vectors in SLP_NODE or in vector info of the scalar statement
4353 (or in STMT_VINFO_RELATED_STMT chain). */
4354 if (slp_node)
4355 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4356 else
4358 if (!*prev_stmt_info)
4359 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4360 else
4361 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4363 *prev_stmt_info = vinfo_for_stmt (new_stmt);
4368 /* For multi-step demotion operations we first generate demotion operations
4369 from the source type to the intermediate types, and then combine the
4370 results (stored in VEC_OPRNDS) in demotion operation to the destination
4371 type. */
4372 if (multi_step_cvt)
4374 /* At each level of recursion we have half of the operands we had at the
4375 previous level. */
4376 vec_oprnds->truncate ((i+1)/2);
4377 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4378 stmt, vec_dsts, gsi, slp_node,
4379 VEC_PACK_TRUNC_EXPR,
4380 prev_stmt_info);
4383 vec_dsts.quick_push (vec_dest);
4387 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4388 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4389 the resulting vectors and call the function recursively. */
4391 static void
4392 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4393 vec<tree> *vec_oprnds1,
4394 gimple *stmt, tree vec_dest,
4395 gimple_stmt_iterator *gsi,
4396 enum tree_code code1,
4397 enum tree_code code2, tree decl1,
4398 tree decl2, int op_type)
4400 int i;
4401 tree vop0, vop1, new_tmp1, new_tmp2;
4402 gimple *new_stmt1, *new_stmt2;
4403 vec<tree> vec_tmp = vNULL;
4405 vec_tmp.create (vec_oprnds0->length () * 2);
4406 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4408 if (op_type == binary_op)
4409 vop1 = (*vec_oprnds1)[i];
4410 else
4411 vop1 = NULL_TREE;
4413 /* Generate the two halves of promotion operation. */
4414 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4415 op_type, vec_dest, gsi, stmt);
4416 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4417 op_type, vec_dest, gsi, stmt);
4418 if (is_gimple_call (new_stmt1))
4420 new_tmp1 = gimple_call_lhs (new_stmt1);
4421 new_tmp2 = gimple_call_lhs (new_stmt2);
4423 else
4425 new_tmp1 = gimple_assign_lhs (new_stmt1);
4426 new_tmp2 = gimple_assign_lhs (new_stmt2);
4429 /* Store the results for the next step. */
4430 vec_tmp.quick_push (new_tmp1);
4431 vec_tmp.quick_push (new_tmp2);
4434 vec_oprnds0->release ();
4435 *vec_oprnds0 = vec_tmp;
4439 /* Check if STMT performs a conversion operation, that can be vectorized.
4440 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4441 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4442 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4444 static bool
4445 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4446 gimple **vec_stmt, slp_tree slp_node)
4448 tree vec_dest;
4449 tree scalar_dest;
4450 tree op0, op1 = NULL_TREE;
4451 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4452 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4453 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4454 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4455 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4456 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4457 tree new_temp;
4458 gimple *def_stmt;
4459 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4460 int ndts = 2;
4461 gimple *new_stmt = NULL;
4462 stmt_vec_info prev_stmt_info;
4463 poly_uint64 nunits_in;
4464 poly_uint64 nunits_out;
4465 tree vectype_out, vectype_in;
4466 int ncopies, i, j;
4467 tree lhs_type, rhs_type;
4468 enum { NARROW, NONE, WIDEN } modifier;
4469 vec<tree> vec_oprnds0 = vNULL;
4470 vec<tree> vec_oprnds1 = vNULL;
4471 tree vop0;
4472 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4473 vec_info *vinfo = stmt_info->vinfo;
4474 int multi_step_cvt = 0;
4475 vec<tree> interm_types = vNULL;
4476 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4477 int op_type;
4478 unsigned short fltsz;
4480 /* Is STMT a vectorizable conversion? */
4482 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4483 return false;
4485 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4486 && ! vec_stmt)
4487 return false;
4489 if (!is_gimple_assign (stmt))
4490 return false;
4492 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4493 return false;
4495 code = gimple_assign_rhs_code (stmt);
4496 if (!CONVERT_EXPR_CODE_P (code)
4497 && code != FIX_TRUNC_EXPR
4498 && code != FLOAT_EXPR
4499 && code != WIDEN_MULT_EXPR
4500 && code != WIDEN_LSHIFT_EXPR)
4501 return false;
4503 op_type = TREE_CODE_LENGTH (code);
4505 /* Check types of lhs and rhs. */
4506 scalar_dest = gimple_assign_lhs (stmt);
4507 lhs_type = TREE_TYPE (scalar_dest);
4508 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4510 op0 = gimple_assign_rhs1 (stmt);
4511 rhs_type = TREE_TYPE (op0);
4513 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4514 && !((INTEGRAL_TYPE_P (lhs_type)
4515 && INTEGRAL_TYPE_P (rhs_type))
4516 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4517 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4518 return false;
4520 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4521 && ((INTEGRAL_TYPE_P (lhs_type)
4522 && !type_has_mode_precision_p (lhs_type))
4523 || (INTEGRAL_TYPE_P (rhs_type)
4524 && !type_has_mode_precision_p (rhs_type))))
4526 if (dump_enabled_p ())
4527 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4528 "type conversion to/from bit-precision unsupported."
4529 "\n");
4530 return false;
4533 /* Check the operands of the operation. */
4534 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4536 if (dump_enabled_p ())
4537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4538 "use not simple.\n");
4539 return false;
4541 if (op_type == binary_op)
4543 bool ok;
4545 op1 = gimple_assign_rhs2 (stmt);
4546 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4547 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4548 OP1. */
4549 if (CONSTANT_CLASS_P (op0))
4550 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4551 else
4552 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4554 if (!ok)
4556 if (dump_enabled_p ())
4557 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4558 "use not simple.\n");
4559 return false;
4563 /* If op0 is an external or constant defs use a vector type of
4564 the same size as the output vector type. */
4565 if (!vectype_in)
4566 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4567 if (vec_stmt)
4568 gcc_assert (vectype_in);
4569 if (!vectype_in)
4571 if (dump_enabled_p ())
4573 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4574 "no vectype for scalar type ");
4575 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4576 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4579 return false;
4582 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4583 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4585 if (dump_enabled_p ())
4587 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4588 "can't convert between boolean and non "
4589 "boolean vectors");
4590 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4591 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4594 return false;
4597 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4598 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4599 if (known_eq (nunits_out, nunits_in))
4600 modifier = NONE;
4601 else if (multiple_p (nunits_out, nunits_in))
4602 modifier = NARROW;
4603 else
4605 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4606 modifier = WIDEN;
4609 /* Multiple types in SLP are handled by creating the appropriate number of
4610 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4611 case of SLP. */
4612 if (slp_node)
4613 ncopies = 1;
4614 else if (modifier == NARROW)
4615 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4616 else
4617 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4619 /* Sanity check: make sure that at least one copy of the vectorized stmt
4620 needs to be generated. */
4621 gcc_assert (ncopies >= 1);
4623 bool found_mode = false;
4624 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4625 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4626 opt_scalar_mode rhs_mode_iter;
4628 /* Supportable by target? */
4629 switch (modifier)
4631 case NONE:
4632 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4633 return false;
4634 if (supportable_convert_operation (code, vectype_out, vectype_in,
4635 &decl1, &code1))
4636 break;
4637 /* FALLTHRU */
4638 unsupported:
4639 if (dump_enabled_p ())
4640 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4641 "conversion not supported by target.\n");
4642 return false;
4644 case WIDEN:
4645 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4646 &code1, &code2, &multi_step_cvt,
4647 &interm_types))
4649 /* Binary widening operation can only be supported directly by the
4650 architecture. */
4651 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4652 break;
4655 if (code != FLOAT_EXPR
4656 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4657 goto unsupported;
4659 fltsz = GET_MODE_SIZE (lhs_mode);
4660 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4662 rhs_mode = rhs_mode_iter.require ();
4663 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4664 break;
4666 cvt_type
4667 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4668 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4669 if (cvt_type == NULL_TREE)
4670 goto unsupported;
4672 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4674 if (!supportable_convert_operation (code, vectype_out,
4675 cvt_type, &decl1, &codecvt1))
4676 goto unsupported;
4678 else if (!supportable_widening_operation (code, stmt, vectype_out,
4679 cvt_type, &codecvt1,
4680 &codecvt2, &multi_step_cvt,
4681 &interm_types))
4682 continue;
4683 else
4684 gcc_assert (multi_step_cvt == 0);
4686 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4687 vectype_in, &code1, &code2,
4688 &multi_step_cvt, &interm_types))
4690 found_mode = true;
4691 break;
4695 if (!found_mode)
4696 goto unsupported;
4698 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4699 codecvt2 = ERROR_MARK;
4700 else
4702 multi_step_cvt++;
4703 interm_types.safe_push (cvt_type);
4704 cvt_type = NULL_TREE;
4706 break;
4708 case NARROW:
4709 gcc_assert (op_type == unary_op);
4710 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4711 &code1, &multi_step_cvt,
4712 &interm_types))
4713 break;
4715 if (code != FIX_TRUNC_EXPR
4716 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4717 goto unsupported;
4719 cvt_type
4720 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4721 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4722 if (cvt_type == NULL_TREE)
4723 goto unsupported;
4724 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4725 &decl1, &codecvt1))
4726 goto unsupported;
4727 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4728 &code1, &multi_step_cvt,
4729 &interm_types))
4730 break;
4731 goto unsupported;
4733 default:
4734 gcc_unreachable ();
4737 if (!vec_stmt) /* transformation not required. */
4739 if (dump_enabled_p ())
4740 dump_printf_loc (MSG_NOTE, vect_location,
4741 "=== vectorizable_conversion ===\n");
4742 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4744 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4745 if (!slp_node)
4746 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4748 else if (modifier == NARROW)
4750 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4751 if (!slp_node)
4752 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4754 else
4756 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4757 if (!slp_node)
4758 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4760 interm_types.release ();
4761 return true;
4764 /* Transform. */
4765 if (dump_enabled_p ())
4766 dump_printf_loc (MSG_NOTE, vect_location,
4767 "transform conversion. ncopies = %d.\n", ncopies);
4769 if (op_type == binary_op)
4771 if (CONSTANT_CLASS_P (op0))
4772 op0 = fold_convert (TREE_TYPE (op1), op0);
4773 else if (CONSTANT_CLASS_P (op1))
4774 op1 = fold_convert (TREE_TYPE (op0), op1);
4777 /* In case of multi-step conversion, we first generate conversion operations
4778 to the intermediate types, and then from that types to the final one.
4779 We create vector destinations for the intermediate type (TYPES) received
4780 from supportable_*_operation, and store them in the correct order
4781 for future use in vect_create_vectorized_*_stmts (). */
4782 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4783 vec_dest = vect_create_destination_var (scalar_dest,
4784 (cvt_type && modifier == WIDEN)
4785 ? cvt_type : vectype_out);
4786 vec_dsts.quick_push (vec_dest);
4788 if (multi_step_cvt)
4790 for (i = interm_types.length () - 1;
4791 interm_types.iterate (i, &intermediate_type); i--)
4793 vec_dest = vect_create_destination_var (scalar_dest,
4794 intermediate_type);
4795 vec_dsts.quick_push (vec_dest);
4799 if (cvt_type)
4800 vec_dest = vect_create_destination_var (scalar_dest,
4801 modifier == WIDEN
4802 ? vectype_out : cvt_type);
4804 if (!slp_node)
4806 if (modifier == WIDEN)
4808 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4809 if (op_type == binary_op)
4810 vec_oprnds1.create (1);
4812 else if (modifier == NARROW)
4813 vec_oprnds0.create (
4814 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4816 else if (code == WIDEN_LSHIFT_EXPR)
4817 vec_oprnds1.create (slp_node->vec_stmts_size);
4819 last_oprnd = op0;
4820 prev_stmt_info = NULL;
4821 switch (modifier)
4823 case NONE:
4824 for (j = 0; j < ncopies; j++)
4826 if (j == 0)
4827 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
4828 else
4829 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4831 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4833 /* Arguments are ready, create the new vector stmt. */
4834 if (code1 == CALL_EXPR)
4836 new_stmt = gimple_build_call (decl1, 1, vop0);
4837 new_temp = make_ssa_name (vec_dest, new_stmt);
4838 gimple_call_set_lhs (new_stmt, new_temp);
4840 else
4842 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4843 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4844 new_temp = make_ssa_name (vec_dest, new_stmt);
4845 gimple_assign_set_lhs (new_stmt, new_temp);
4848 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4849 if (slp_node)
4850 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4851 else
4853 if (!prev_stmt_info)
4854 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4855 else
4856 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4857 prev_stmt_info = vinfo_for_stmt (new_stmt);
4861 break;
4863 case WIDEN:
4864 /* In case the vectorization factor (VF) is bigger than the number
4865 of elements that we can fit in a vectype (nunits), we have to
4866 generate more than one vector stmt - i.e - we need to "unroll"
4867 the vector stmt by a factor VF/nunits. */
4868 for (j = 0; j < ncopies; j++)
4870 /* Handle uses. */
4871 if (j == 0)
4873 if (slp_node)
4875 if (code == WIDEN_LSHIFT_EXPR)
4877 unsigned int k;
4879 vec_oprnd1 = op1;
4880 /* Store vec_oprnd1 for every vector stmt to be created
4881 for SLP_NODE. We check during the analysis that all
4882 the shift arguments are the same. */
4883 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4884 vec_oprnds1.quick_push (vec_oprnd1);
4886 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4887 slp_node);
4889 else
4890 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4891 &vec_oprnds1, slp_node);
4893 else
4895 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4896 vec_oprnds0.quick_push (vec_oprnd0);
4897 if (op_type == binary_op)
4899 if (code == WIDEN_LSHIFT_EXPR)
4900 vec_oprnd1 = op1;
4901 else
4902 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4903 vec_oprnds1.quick_push (vec_oprnd1);
4907 else
4909 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4910 vec_oprnds0.truncate (0);
4911 vec_oprnds0.quick_push (vec_oprnd0);
4912 if (op_type == binary_op)
4914 if (code == WIDEN_LSHIFT_EXPR)
4915 vec_oprnd1 = op1;
4916 else
4917 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4918 vec_oprnd1);
4919 vec_oprnds1.truncate (0);
4920 vec_oprnds1.quick_push (vec_oprnd1);
4924 /* Arguments are ready. Create the new vector stmts. */
4925 for (i = multi_step_cvt; i >= 0; i--)
4927 tree this_dest = vec_dsts[i];
4928 enum tree_code c1 = code1, c2 = code2;
4929 if (i == 0 && codecvt2 != ERROR_MARK)
4931 c1 = codecvt1;
4932 c2 = codecvt2;
4934 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4935 &vec_oprnds1,
4936 stmt, this_dest, gsi,
4937 c1, c2, decl1, decl2,
4938 op_type);
4941 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4943 if (cvt_type)
4945 if (codecvt1 == CALL_EXPR)
4947 new_stmt = gimple_build_call (decl1, 1, vop0);
4948 new_temp = make_ssa_name (vec_dest, new_stmt);
4949 gimple_call_set_lhs (new_stmt, new_temp);
4951 else
4953 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4954 new_temp = make_ssa_name (vec_dest);
4955 new_stmt = gimple_build_assign (new_temp, codecvt1,
4956 vop0);
4959 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4961 else
4962 new_stmt = SSA_NAME_DEF_STMT (vop0);
4964 if (slp_node)
4965 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4966 else
4968 if (!prev_stmt_info)
4969 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4970 else
4971 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4972 prev_stmt_info = vinfo_for_stmt (new_stmt);
4977 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4978 break;
4980 case NARROW:
4981 /* In case the vectorization factor (VF) is bigger than the number
4982 of elements that we can fit in a vectype (nunits), we have to
4983 generate more than one vector stmt - i.e - we need to "unroll"
4984 the vector stmt by a factor VF/nunits. */
4985 for (j = 0; j < ncopies; j++)
4987 /* Handle uses. */
4988 if (slp_node)
4989 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4990 slp_node);
4991 else
4993 vec_oprnds0.truncate (0);
4994 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4995 vect_pow2 (multi_step_cvt) - 1);
4998 /* Arguments are ready. Create the new vector stmts. */
4999 if (cvt_type)
5000 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5002 if (codecvt1 == CALL_EXPR)
5004 new_stmt = gimple_build_call (decl1, 1, vop0);
5005 new_temp = make_ssa_name (vec_dest, new_stmt);
5006 gimple_call_set_lhs (new_stmt, new_temp);
5008 else
5010 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5011 new_temp = make_ssa_name (vec_dest);
5012 new_stmt = gimple_build_assign (new_temp, codecvt1,
5013 vop0);
5016 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5017 vec_oprnds0[i] = new_temp;
5020 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5021 stmt, vec_dsts, gsi,
5022 slp_node, code1,
5023 &prev_stmt_info);
5026 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5027 break;
5030 vec_oprnds0.release ();
5031 vec_oprnds1.release ();
5032 interm_types.release ();
5034 return true;
5038 /* Function vectorizable_assignment.
5040 Check if STMT performs an assignment (copy) that can be vectorized.
5041 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5042 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5043 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5045 static bool
5046 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
5047 gimple **vec_stmt, slp_tree slp_node)
5049 tree vec_dest;
5050 tree scalar_dest;
5051 tree op;
5052 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5053 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5054 tree new_temp;
5055 gimple *def_stmt;
5056 enum vect_def_type dt[1] = {vect_unknown_def_type};
5057 int ndts = 1;
5058 int ncopies;
5059 int i, j;
5060 vec<tree> vec_oprnds = vNULL;
5061 tree vop;
5062 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5063 vec_info *vinfo = stmt_info->vinfo;
5064 gimple *new_stmt = NULL;
5065 stmt_vec_info prev_stmt_info = NULL;
5066 enum tree_code code;
5067 tree vectype_in;
5069 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5070 return false;
5072 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5073 && ! vec_stmt)
5074 return false;
5076 /* Is vectorizable assignment? */
5077 if (!is_gimple_assign (stmt))
5078 return false;
5080 scalar_dest = gimple_assign_lhs (stmt);
5081 if (TREE_CODE (scalar_dest) != SSA_NAME)
5082 return false;
5084 code = gimple_assign_rhs_code (stmt);
5085 if (gimple_assign_single_p (stmt)
5086 || code == PAREN_EXPR
5087 || CONVERT_EXPR_CODE_P (code))
5088 op = gimple_assign_rhs1 (stmt);
5089 else
5090 return false;
5092 if (code == VIEW_CONVERT_EXPR)
5093 op = TREE_OPERAND (op, 0);
5095 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5096 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5098 /* Multiple types in SLP are handled by creating the appropriate number of
5099 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5100 case of SLP. */
5101 if (slp_node)
5102 ncopies = 1;
5103 else
5104 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5106 gcc_assert (ncopies >= 1);
5108 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
5110 if (dump_enabled_p ())
5111 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5112 "use not simple.\n");
5113 return false;
5116 /* We can handle NOP_EXPR conversions that do not change the number
5117 of elements or the vector size. */
5118 if ((CONVERT_EXPR_CODE_P (code)
5119 || code == VIEW_CONVERT_EXPR)
5120 && (!vectype_in
5121 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5122 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5123 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5124 return false;
5126 /* We do not handle bit-precision changes. */
5127 if ((CONVERT_EXPR_CODE_P (code)
5128 || code == VIEW_CONVERT_EXPR)
5129 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5130 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5131 || !type_has_mode_precision_p (TREE_TYPE (op)))
5132 /* But a conversion that does not change the bit-pattern is ok. */
5133 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5134 > TYPE_PRECISION (TREE_TYPE (op)))
5135 && TYPE_UNSIGNED (TREE_TYPE (op)))
5136 /* Conversion between boolean types of different sizes is
5137 a simple assignment in case their vectypes are same
5138 boolean vectors. */
5139 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5140 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5142 if (dump_enabled_p ())
5143 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5144 "type conversion to/from bit-precision "
5145 "unsupported.\n");
5146 return false;
5149 if (!vec_stmt) /* transformation not required. */
5151 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5152 if (dump_enabled_p ())
5153 dump_printf_loc (MSG_NOTE, vect_location,
5154 "=== vectorizable_assignment ===\n");
5155 if (!slp_node)
5156 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5157 return true;
5160 /* Transform. */
5161 if (dump_enabled_p ())
5162 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5164 /* Handle def. */
5165 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5167 /* Handle use. */
5168 for (j = 0; j < ncopies; j++)
5170 /* Handle uses. */
5171 if (j == 0)
5172 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
5173 else
5174 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
5176 /* Arguments are ready. create the new vector stmt. */
5177 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5179 if (CONVERT_EXPR_CODE_P (code)
5180 || code == VIEW_CONVERT_EXPR)
5181 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5182 new_stmt = gimple_build_assign (vec_dest, vop);
5183 new_temp = make_ssa_name (vec_dest, new_stmt);
5184 gimple_assign_set_lhs (new_stmt, new_temp);
5185 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5186 if (slp_node)
5187 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5190 if (slp_node)
5191 continue;
5193 if (j == 0)
5194 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5195 else
5196 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5198 prev_stmt_info = vinfo_for_stmt (new_stmt);
5201 vec_oprnds.release ();
5202 return true;
5206 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5207 either as shift by a scalar or by a vector. */
5209 bool
5210 vect_supportable_shift (enum tree_code code, tree scalar_type)
5213 machine_mode vec_mode;
5214 optab optab;
5215 int icode;
5216 tree vectype;
5218 vectype = get_vectype_for_scalar_type (scalar_type);
5219 if (!vectype)
5220 return false;
5222 optab = optab_for_tree_code (code, vectype, optab_scalar);
5223 if (!optab
5224 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5226 optab = optab_for_tree_code (code, vectype, optab_vector);
5227 if (!optab
5228 || (optab_handler (optab, TYPE_MODE (vectype))
5229 == CODE_FOR_nothing))
5230 return false;
5233 vec_mode = TYPE_MODE (vectype);
5234 icode = (int) optab_handler (optab, vec_mode);
5235 if (icode == CODE_FOR_nothing)
5236 return false;
5238 return true;
5242 /* Function vectorizable_shift.
5244 Check if STMT performs a shift operation that can be vectorized.
5245 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5246 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5247 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5249 static bool
5250 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
5251 gimple **vec_stmt, slp_tree slp_node)
5253 tree vec_dest;
5254 tree scalar_dest;
5255 tree op0, op1 = NULL;
5256 tree vec_oprnd1 = NULL_TREE;
5257 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5258 tree vectype;
5259 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5260 enum tree_code code;
5261 machine_mode vec_mode;
5262 tree new_temp;
5263 optab optab;
5264 int icode;
5265 machine_mode optab_op2_mode;
5266 gimple *def_stmt;
5267 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5268 int ndts = 2;
5269 gimple *new_stmt = NULL;
5270 stmt_vec_info prev_stmt_info;
5271 poly_uint64 nunits_in;
5272 poly_uint64 nunits_out;
5273 tree vectype_out;
5274 tree op1_vectype;
5275 int ncopies;
5276 int j, i;
5277 vec<tree> vec_oprnds0 = vNULL;
5278 vec<tree> vec_oprnds1 = vNULL;
5279 tree vop0, vop1;
5280 unsigned int k;
5281 bool scalar_shift_arg = true;
5282 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5283 vec_info *vinfo = stmt_info->vinfo;
5285 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5286 return false;
5288 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5289 && ! vec_stmt)
5290 return false;
5292 /* Is STMT a vectorizable binary/unary operation? */
5293 if (!is_gimple_assign (stmt))
5294 return false;
5296 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5297 return false;
5299 code = gimple_assign_rhs_code (stmt);
5301 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5302 || code == RROTATE_EXPR))
5303 return false;
5305 scalar_dest = gimple_assign_lhs (stmt);
5306 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5307 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5309 if (dump_enabled_p ())
5310 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5311 "bit-precision shifts not supported.\n");
5312 return false;
5315 op0 = gimple_assign_rhs1 (stmt);
5316 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5318 if (dump_enabled_p ())
5319 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5320 "use not simple.\n");
5321 return false;
5323 /* If op0 is an external or constant def use a vector type with
5324 the same size as the output vector type. */
5325 if (!vectype)
5326 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5327 if (vec_stmt)
5328 gcc_assert (vectype);
5329 if (!vectype)
5331 if (dump_enabled_p ())
5332 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5333 "no vectype for scalar type\n");
5334 return false;
5337 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5338 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5339 if (maybe_ne (nunits_out, nunits_in))
5340 return false;
5342 op1 = gimple_assign_rhs2 (stmt);
5343 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
5345 if (dump_enabled_p ())
5346 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5347 "use not simple.\n");
5348 return false;
5351 /* Multiple types in SLP are handled by creating the appropriate number of
5352 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5353 case of SLP. */
5354 if (slp_node)
5355 ncopies = 1;
5356 else
5357 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5359 gcc_assert (ncopies >= 1);
5361 /* Determine whether the shift amount is a vector, or scalar. If the
5362 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5364 if ((dt[1] == vect_internal_def
5365 || dt[1] == vect_induction_def)
5366 && !slp_node)
5367 scalar_shift_arg = false;
5368 else if (dt[1] == vect_constant_def
5369 || dt[1] == vect_external_def
5370 || dt[1] == vect_internal_def)
5372 /* In SLP, need to check whether the shift count is the same,
5373 in loops if it is a constant or invariant, it is always
5374 a scalar shift. */
5375 if (slp_node)
5377 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5378 gimple *slpstmt;
5380 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
5381 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5382 scalar_shift_arg = false;
5385 /* If the shift amount is computed by a pattern stmt we cannot
5386 use the scalar amount directly thus give up and use a vector
5387 shift. */
5388 if (dt[1] == vect_internal_def)
5390 gimple *def = SSA_NAME_DEF_STMT (op1);
5391 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
5392 scalar_shift_arg = false;
5395 else
5397 if (dump_enabled_p ())
5398 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5399 "operand mode requires invariant argument.\n");
5400 return false;
5403 /* Vector shifted by vector. */
5404 if (!scalar_shift_arg)
5406 optab = optab_for_tree_code (code, vectype, optab_vector);
5407 if (dump_enabled_p ())
5408 dump_printf_loc (MSG_NOTE, vect_location,
5409 "vector/vector shift/rotate found.\n");
5411 if (!op1_vectype)
5412 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5413 if (op1_vectype == NULL_TREE
5414 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5416 if (dump_enabled_p ())
5417 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5418 "unusable type for last operand in"
5419 " vector/vector shift/rotate.\n");
5420 return false;
5423 /* See if the machine has a vector shifted by scalar insn and if not
5424 then see if it has a vector shifted by vector insn. */
5425 else
5427 optab = optab_for_tree_code (code, vectype, optab_scalar);
5428 if (optab
5429 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5431 if (dump_enabled_p ())
5432 dump_printf_loc (MSG_NOTE, vect_location,
5433 "vector/scalar shift/rotate found.\n");
5435 else
5437 optab = optab_for_tree_code (code, vectype, optab_vector);
5438 if (optab
5439 && (optab_handler (optab, TYPE_MODE (vectype))
5440 != CODE_FOR_nothing))
5442 scalar_shift_arg = false;
5444 if (dump_enabled_p ())
5445 dump_printf_loc (MSG_NOTE, vect_location,
5446 "vector/vector shift/rotate found.\n");
5448 /* Unlike the other binary operators, shifts/rotates have
5449 the rhs being int, instead of the same type as the lhs,
5450 so make sure the scalar is the right type if we are
5451 dealing with vectors of long long/long/short/char. */
5452 if (dt[1] == vect_constant_def)
5453 op1 = fold_convert (TREE_TYPE (vectype), op1);
5454 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5455 TREE_TYPE (op1)))
5457 if (slp_node
5458 && TYPE_MODE (TREE_TYPE (vectype))
5459 != TYPE_MODE (TREE_TYPE (op1)))
5461 if (dump_enabled_p ())
5462 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5463 "unusable type for last operand in"
5464 " vector/vector shift/rotate.\n");
5465 return false;
5467 if (vec_stmt && !slp_node)
5469 op1 = fold_convert (TREE_TYPE (vectype), op1);
5470 op1 = vect_init_vector (stmt, op1,
5471 TREE_TYPE (vectype), NULL);
5478 /* Supportable by target? */
5479 if (!optab)
5481 if (dump_enabled_p ())
5482 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5483 "no optab.\n");
5484 return false;
5486 vec_mode = TYPE_MODE (vectype);
5487 icode = (int) optab_handler (optab, vec_mode);
5488 if (icode == CODE_FOR_nothing)
5490 if (dump_enabled_p ())
5491 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5492 "op not supported by target.\n");
5493 /* Check only during analysis. */
5494 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5495 || (!vec_stmt
5496 && !vect_worthwhile_without_simd_p (vinfo, code)))
5497 return false;
5498 if (dump_enabled_p ())
5499 dump_printf_loc (MSG_NOTE, vect_location,
5500 "proceeding using word mode.\n");
5503 /* Worthwhile without SIMD support? Check only during analysis. */
5504 if (!vec_stmt
5505 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5506 && !vect_worthwhile_without_simd_p (vinfo, code))
5508 if (dump_enabled_p ())
5509 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5510 "not worthwhile without SIMD support.\n");
5511 return false;
5514 if (!vec_stmt) /* transformation not required. */
5516 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5517 if (dump_enabled_p ())
5518 dump_printf_loc (MSG_NOTE, vect_location,
5519 "=== vectorizable_shift ===\n");
5520 if (!slp_node)
5521 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5522 return true;
5525 /* Transform. */
5527 if (dump_enabled_p ())
5528 dump_printf_loc (MSG_NOTE, vect_location,
5529 "transform binary/unary operation.\n");
5531 /* Handle def. */
5532 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5534 prev_stmt_info = NULL;
5535 for (j = 0; j < ncopies; j++)
5537 /* Handle uses. */
5538 if (j == 0)
5540 if (scalar_shift_arg)
5542 /* Vector shl and shr insn patterns can be defined with scalar
5543 operand 2 (shift operand). In this case, use constant or loop
5544 invariant op1 directly, without extending it to vector mode
5545 first. */
5546 optab_op2_mode = insn_data[icode].operand[2].mode;
5547 if (!VECTOR_MODE_P (optab_op2_mode))
5549 if (dump_enabled_p ())
5550 dump_printf_loc (MSG_NOTE, vect_location,
5551 "operand 1 using scalar mode.\n");
5552 vec_oprnd1 = op1;
5553 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5554 vec_oprnds1.quick_push (vec_oprnd1);
5555 if (slp_node)
5557 /* Store vec_oprnd1 for every vector stmt to be created
5558 for SLP_NODE. We check during the analysis that all
5559 the shift arguments are the same.
5560 TODO: Allow different constants for different vector
5561 stmts generated for an SLP instance. */
5562 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5563 vec_oprnds1.quick_push (vec_oprnd1);
5568 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5569 (a special case for certain kind of vector shifts); otherwise,
5570 operand 1 should be of a vector type (the usual case). */
5571 if (vec_oprnd1)
5572 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5573 slp_node);
5574 else
5575 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5576 slp_node);
5578 else
5579 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5581 /* Arguments are ready. Create the new vector stmt. */
5582 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5584 vop1 = vec_oprnds1[i];
5585 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5586 new_temp = make_ssa_name (vec_dest, new_stmt);
5587 gimple_assign_set_lhs (new_stmt, new_temp);
5588 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5589 if (slp_node)
5590 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5593 if (slp_node)
5594 continue;
5596 if (j == 0)
5597 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5598 else
5599 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5600 prev_stmt_info = vinfo_for_stmt (new_stmt);
5603 vec_oprnds0.release ();
5604 vec_oprnds1.release ();
5606 return true;
5610 /* Function vectorizable_operation.
5612 Check if STMT performs a binary, unary or ternary operation that can
5613 be vectorized.
5614 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5615 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5616 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5618 static bool
5619 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5620 gimple **vec_stmt, slp_tree slp_node)
5622 tree vec_dest;
5623 tree scalar_dest;
5624 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5625 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5626 tree vectype;
5627 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5628 enum tree_code code, orig_code;
5629 machine_mode vec_mode;
5630 tree new_temp;
5631 int op_type;
5632 optab optab;
5633 bool target_support_p;
5634 gimple *def_stmt;
5635 enum vect_def_type dt[3]
5636 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5637 int ndts = 3;
5638 gimple *new_stmt = NULL;
5639 stmt_vec_info prev_stmt_info;
5640 poly_uint64 nunits_in;
5641 poly_uint64 nunits_out;
5642 tree vectype_out;
5643 int ncopies;
5644 int j, i;
5645 vec<tree> vec_oprnds0 = vNULL;
5646 vec<tree> vec_oprnds1 = vNULL;
5647 vec<tree> vec_oprnds2 = vNULL;
5648 tree vop0, vop1, vop2;
5649 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5650 vec_info *vinfo = stmt_info->vinfo;
5652 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5653 return false;
5655 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5656 && ! vec_stmt)
5657 return false;
5659 /* Is STMT a vectorizable binary/unary operation? */
5660 if (!is_gimple_assign (stmt))
5661 return false;
5663 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5664 return false;
5666 orig_code = code = gimple_assign_rhs_code (stmt);
5668 /* For pointer addition and subtraction, we should use the normal
5669 plus and minus for the vector operation. */
5670 if (code == POINTER_PLUS_EXPR)
5671 code = PLUS_EXPR;
5672 if (code == POINTER_DIFF_EXPR)
5673 code = MINUS_EXPR;
5675 /* Support only unary or binary operations. */
5676 op_type = TREE_CODE_LENGTH (code);
5677 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5679 if (dump_enabled_p ())
5680 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5681 "num. args = %d (not unary/binary/ternary op).\n",
5682 op_type);
5683 return false;
5686 scalar_dest = gimple_assign_lhs (stmt);
5687 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5689 /* Most operations cannot handle bit-precision types without extra
5690 truncations. */
5691 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5692 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5693 /* Exception are bitwise binary operations. */
5694 && code != BIT_IOR_EXPR
5695 && code != BIT_XOR_EXPR
5696 && code != BIT_AND_EXPR)
5698 if (dump_enabled_p ())
5699 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5700 "bit-precision arithmetic not supported.\n");
5701 return false;
5704 op0 = gimple_assign_rhs1 (stmt);
5705 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5707 if (dump_enabled_p ())
5708 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5709 "use not simple.\n");
5710 return false;
5712 /* If op0 is an external or constant def use a vector type with
5713 the same size as the output vector type. */
5714 if (!vectype)
5716 /* For boolean type we cannot determine vectype by
5717 invariant value (don't know whether it is a vector
5718 of booleans or vector of integers). We use output
5719 vectype because operations on boolean don't change
5720 type. */
5721 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5723 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5725 if (dump_enabled_p ())
5726 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5727 "not supported operation on bool value.\n");
5728 return false;
5730 vectype = vectype_out;
5732 else
5733 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5735 if (vec_stmt)
5736 gcc_assert (vectype);
5737 if (!vectype)
5739 if (dump_enabled_p ())
5741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5742 "no vectype for scalar type ");
5743 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5744 TREE_TYPE (op0));
5745 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5748 return false;
5751 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5752 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5753 if (maybe_ne (nunits_out, nunits_in))
5754 return false;
5756 if (op_type == binary_op || op_type == ternary_op)
5758 op1 = gimple_assign_rhs2 (stmt);
5759 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5761 if (dump_enabled_p ())
5762 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5763 "use not simple.\n");
5764 return false;
5767 if (op_type == ternary_op)
5769 op2 = gimple_assign_rhs3 (stmt);
5770 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5772 if (dump_enabled_p ())
5773 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5774 "use not simple.\n");
5775 return false;
5779 /* Multiple types in SLP are handled by creating the appropriate number of
5780 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5781 case of SLP. */
5782 if (slp_node)
5783 ncopies = 1;
5784 else
5785 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5787 gcc_assert (ncopies >= 1);
5789 /* Shifts are handled in vectorizable_shift (). */
5790 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5791 || code == RROTATE_EXPR)
5792 return false;
5794 /* Supportable by target? */
5796 vec_mode = TYPE_MODE (vectype);
5797 if (code == MULT_HIGHPART_EXPR)
5798 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5799 else
5801 optab = optab_for_tree_code (code, vectype, optab_default);
5802 if (!optab)
5804 if (dump_enabled_p ())
5805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5806 "no optab.\n");
5807 return false;
5809 target_support_p = (optab_handler (optab, vec_mode)
5810 != CODE_FOR_nothing);
5813 if (!target_support_p)
5815 if (dump_enabled_p ())
5816 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5817 "op not supported by target.\n");
5818 /* Check only during analysis. */
5819 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5820 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5821 return false;
5822 if (dump_enabled_p ())
5823 dump_printf_loc (MSG_NOTE, vect_location,
5824 "proceeding using word mode.\n");
5827 /* Worthwhile without SIMD support? Check only during analysis. */
5828 if (!VECTOR_MODE_P (vec_mode)
5829 && !vec_stmt
5830 && !vect_worthwhile_without_simd_p (vinfo, code))
5832 if (dump_enabled_p ())
5833 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5834 "not worthwhile without SIMD support.\n");
5835 return false;
5838 if (!vec_stmt) /* transformation not required. */
5840 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5841 if (dump_enabled_p ())
5842 dump_printf_loc (MSG_NOTE, vect_location,
5843 "=== vectorizable_operation ===\n");
5844 if (!slp_node)
5845 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5846 return true;
5849 /* Transform. */
5851 if (dump_enabled_p ())
5852 dump_printf_loc (MSG_NOTE, vect_location,
5853 "transform binary/unary operation.\n");
5855 /* Handle def. */
5856 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5858 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5859 vectors with unsigned elements, but the result is signed. So, we
5860 need to compute the MINUS_EXPR into vectype temporary and
5861 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5862 tree vec_cvt_dest = NULL_TREE;
5863 if (orig_code == POINTER_DIFF_EXPR)
5864 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5866 /* In case the vectorization factor (VF) is bigger than the number
5867 of elements that we can fit in a vectype (nunits), we have to generate
5868 more than one vector stmt - i.e - we need to "unroll" the
5869 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5870 from one copy of the vector stmt to the next, in the field
5871 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5872 stages to find the correct vector defs to be used when vectorizing
5873 stmts that use the defs of the current stmt. The example below
5874 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5875 we need to create 4 vectorized stmts):
5877 before vectorization:
5878 RELATED_STMT VEC_STMT
5879 S1: x = memref - -
5880 S2: z = x + 1 - -
5882 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5883 there):
5884 RELATED_STMT VEC_STMT
5885 VS1_0: vx0 = memref0 VS1_1 -
5886 VS1_1: vx1 = memref1 VS1_2 -
5887 VS1_2: vx2 = memref2 VS1_3 -
5888 VS1_3: vx3 = memref3 - -
5889 S1: x = load - VS1_0
5890 S2: z = x + 1 - -
5892 step2: vectorize stmt S2 (done here):
5893 To vectorize stmt S2 we first need to find the relevant vector
5894 def for the first operand 'x'. This is, as usual, obtained from
5895 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5896 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5897 relevant vector def 'vx0'. Having found 'vx0' we can generate
5898 the vector stmt VS2_0, and as usual, record it in the
5899 STMT_VINFO_VEC_STMT of stmt S2.
5900 When creating the second copy (VS2_1), we obtain the relevant vector
5901 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5902 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5903 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5904 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5905 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5906 chain of stmts and pointers:
5907 RELATED_STMT VEC_STMT
5908 VS1_0: vx0 = memref0 VS1_1 -
5909 VS1_1: vx1 = memref1 VS1_2 -
5910 VS1_2: vx2 = memref2 VS1_3 -
5911 VS1_3: vx3 = memref3 - -
5912 S1: x = load - VS1_0
5913 VS2_0: vz0 = vx0 + v1 VS2_1 -
5914 VS2_1: vz1 = vx1 + v1 VS2_2 -
5915 VS2_2: vz2 = vx2 + v1 VS2_3 -
5916 VS2_3: vz3 = vx3 + v1 - -
5917 S2: z = x + 1 - VS2_0 */
5919 prev_stmt_info = NULL;
5920 for (j = 0; j < ncopies; j++)
5922 /* Handle uses. */
5923 if (j == 0)
5925 if (op_type == binary_op)
5926 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5927 slp_node);
5928 else if (op_type == ternary_op)
5930 if (slp_node)
5932 auto_vec<tree> ops(3);
5933 ops.quick_push (op0);
5934 ops.quick_push (op1);
5935 ops.quick_push (op2);
5936 auto_vec<vec<tree> > vec_defs(3);
5937 vect_get_slp_defs (ops, slp_node, &vec_defs);
5938 vec_oprnds0 = vec_defs[0];
5939 vec_oprnds1 = vec_defs[1];
5940 vec_oprnds2 = vec_defs[2];
5942 else
5944 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5945 NULL);
5946 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5947 NULL);
5950 else
5951 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5952 slp_node);
5954 else
5956 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5957 if (op_type == ternary_op)
5959 tree vec_oprnd = vec_oprnds2.pop ();
5960 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5961 vec_oprnd));
5965 /* Arguments are ready. Create the new vector stmt. */
5966 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5968 vop1 = ((op_type == binary_op || op_type == ternary_op)
5969 ? vec_oprnds1[i] : NULL_TREE);
5970 vop2 = ((op_type == ternary_op)
5971 ? vec_oprnds2[i] : NULL_TREE);
5972 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5973 new_temp = make_ssa_name (vec_dest, new_stmt);
5974 gimple_assign_set_lhs (new_stmt, new_temp);
5975 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5976 if (vec_cvt_dest)
5978 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
5979 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
5980 new_temp);
5981 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
5982 gimple_assign_set_lhs (new_stmt, new_temp);
5983 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5985 if (slp_node)
5986 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5989 if (slp_node)
5990 continue;
5992 if (j == 0)
5993 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5994 else
5995 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5996 prev_stmt_info = vinfo_for_stmt (new_stmt);
5999 vec_oprnds0.release ();
6000 vec_oprnds1.release ();
6001 vec_oprnds2.release ();
6003 return true;
6006 /* A helper function to ensure data reference DR's base alignment. */
6008 static void
6009 ensure_base_align (struct data_reference *dr)
6011 if (!dr->aux)
6012 return;
6014 if (DR_VECT_AUX (dr)->base_misaligned)
6016 tree base_decl = DR_VECT_AUX (dr)->base_decl;
6018 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
6020 if (decl_in_symtab_p (base_decl))
6021 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6022 else
6024 SET_DECL_ALIGN (base_decl, align_base_to);
6025 DECL_USER_ALIGN (base_decl) = 1;
6027 DR_VECT_AUX (dr)->base_misaligned = false;
6032 /* Function get_group_alias_ptr_type.
6034 Return the alias type for the group starting at FIRST_STMT. */
6036 static tree
6037 get_group_alias_ptr_type (gimple *first_stmt)
6039 struct data_reference *first_dr, *next_dr;
6040 gimple *next_stmt;
6042 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6043 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
6044 while (next_stmt)
6046 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
6047 if (get_alias_set (DR_REF (first_dr))
6048 != get_alias_set (DR_REF (next_dr)))
6050 if (dump_enabled_p ())
6051 dump_printf_loc (MSG_NOTE, vect_location,
6052 "conflicting alias set types.\n");
6053 return ptr_type_node;
6055 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6057 return reference_alias_ptr_type (DR_REF (first_dr));
6061 /* Function vectorizable_store.
6063 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
6064 can be vectorized.
6065 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6066 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6067 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6069 static bool
6070 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6071 slp_tree slp_node)
6073 tree data_ref;
6074 tree op;
6075 tree vec_oprnd = NULL_TREE;
6076 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6077 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6078 tree elem_type;
6079 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6080 struct loop *loop = NULL;
6081 machine_mode vec_mode;
6082 tree dummy;
6083 enum dr_alignment_support alignment_support_scheme;
6084 gimple *def_stmt;
6085 enum vect_def_type rhs_dt = vect_unknown_def_type;
6086 enum vect_def_type mask_dt = vect_unknown_def_type;
6087 stmt_vec_info prev_stmt_info = NULL;
6088 tree dataref_ptr = NULL_TREE;
6089 tree dataref_offset = NULL_TREE;
6090 gimple *ptr_incr = NULL;
6091 int ncopies;
6092 int j;
6093 gimple *next_stmt, *first_stmt;
6094 bool grouped_store;
6095 unsigned int group_size, i;
6096 vec<tree> oprnds = vNULL;
6097 vec<tree> result_chain = vNULL;
6098 bool inv_p;
6099 tree offset = NULL_TREE;
6100 vec<tree> vec_oprnds = vNULL;
6101 bool slp = (slp_node != NULL);
6102 unsigned int vec_num;
6103 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6104 vec_info *vinfo = stmt_info->vinfo;
6105 tree aggr_type;
6106 gather_scatter_info gs_info;
6107 gimple *new_stmt;
6108 poly_uint64 vf;
6109 vec_load_store_type vls_type;
6110 tree ref_type;
6112 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6113 return false;
6115 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6116 && ! vec_stmt)
6117 return false;
6119 /* Is vectorizable store? */
6121 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6122 if (is_gimple_assign (stmt))
6124 tree scalar_dest = gimple_assign_lhs (stmt);
6125 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6126 && is_pattern_stmt_p (stmt_info))
6127 scalar_dest = TREE_OPERAND (scalar_dest, 0);
6128 if (TREE_CODE (scalar_dest) != ARRAY_REF
6129 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6130 && TREE_CODE (scalar_dest) != INDIRECT_REF
6131 && TREE_CODE (scalar_dest) != COMPONENT_REF
6132 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6133 && TREE_CODE (scalar_dest) != REALPART_EXPR
6134 && TREE_CODE (scalar_dest) != MEM_REF)
6135 return false;
6137 else
6139 gcall *call = dyn_cast <gcall *> (stmt);
6140 if (!call || !gimple_call_internal_p (call))
6141 return false;
6143 internal_fn ifn = gimple_call_internal_fn (call);
6144 if (!internal_store_fn_p (ifn))
6145 return false;
6147 if (slp_node != NULL)
6149 if (dump_enabled_p ())
6150 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6151 "SLP of masked stores not supported.\n");
6152 return false;
6155 int mask_index = internal_fn_mask_index (ifn);
6156 if (mask_index >= 0)
6158 mask = gimple_call_arg (call, mask_index);
6159 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
6160 &mask_vectype))
6161 return false;
6165 op = vect_get_store_rhs (stmt);
6167 /* Cannot have hybrid store SLP -- that would mean storing to the
6168 same location twice. */
6169 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6171 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
6172 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6174 if (loop_vinfo)
6176 loop = LOOP_VINFO_LOOP (loop_vinfo);
6177 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6179 else
6180 vf = 1;
6182 /* Multiple types in SLP are handled by creating the appropriate number of
6183 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6184 case of SLP. */
6185 if (slp)
6186 ncopies = 1;
6187 else
6188 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6190 gcc_assert (ncopies >= 1);
6192 /* FORNOW. This restriction should be relaxed. */
6193 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
6195 if (dump_enabled_p ())
6196 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6197 "multiple types in nested loop.\n");
6198 return false;
6201 if (!vect_check_store_rhs (stmt, op, &rhs_dt, &rhs_vectype, &vls_type))
6202 return false;
6204 elem_type = TREE_TYPE (vectype);
6205 vec_mode = TYPE_MODE (vectype);
6207 if (!STMT_VINFO_DATA_REF (stmt_info))
6208 return false;
6210 vect_memory_access_type memory_access_type;
6211 if (!get_load_store_type (stmt, vectype, slp, mask, vls_type, ncopies,
6212 &memory_access_type, &gs_info))
6213 return false;
6215 if (mask)
6217 if (memory_access_type == VMAT_CONTIGUOUS)
6219 if (!VECTOR_MODE_P (vec_mode)
6220 || !can_vec_mask_load_store_p (vec_mode,
6221 TYPE_MODE (mask_vectype), false))
6222 return false;
6224 else if (memory_access_type != VMAT_LOAD_STORE_LANES
6225 && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
6227 if (dump_enabled_p ())
6228 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6229 "unsupported access type for masked store.\n");
6230 return false;
6233 else
6235 /* FORNOW. In some cases can vectorize even if data-type not supported
6236 (e.g. - array initialization with 0). */
6237 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6238 return false;
6241 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
6242 && memory_access_type != VMAT_GATHER_SCATTER
6243 && (slp || memory_access_type != VMAT_CONTIGUOUS));
6244 if (grouped_store)
6246 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6247 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6248 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6250 else
6252 first_stmt = stmt;
6253 first_dr = dr;
6254 group_size = vec_num = 1;
6257 if (!vec_stmt) /* transformation not required. */
6259 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6261 if (loop_vinfo
6262 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6263 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
6264 memory_access_type, &gs_info);
6266 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
6267 /* The SLP costs are calculated during SLP analysis. */
6268 if (!slp_node)
6269 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
6270 vls_type, NULL, NULL, NULL);
6271 return true;
6273 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6275 /* Transform. */
6277 ensure_base_align (dr);
6279 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
6281 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
6282 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6283 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6284 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
6285 edge pe = loop_preheader_edge (loop);
6286 gimple_seq seq;
6287 basic_block new_bb;
6288 enum { NARROW, NONE, WIDEN } modifier;
6289 poly_uint64 scatter_off_nunits
6290 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6292 if (known_eq (nunits, scatter_off_nunits))
6293 modifier = NONE;
6294 else if (known_eq (nunits * 2, scatter_off_nunits))
6296 modifier = WIDEN;
6298 /* Currently gathers and scatters are only supported for
6299 fixed-length vectors. */
6300 unsigned int count = scatter_off_nunits.to_constant ();
6301 vec_perm_builder sel (count, count, 1);
6302 for (i = 0; i < (unsigned int) count; ++i)
6303 sel.quick_push (i | (count / 2));
6305 vec_perm_indices indices (sel, 1, count);
6306 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6307 indices);
6308 gcc_assert (perm_mask != NULL_TREE);
6310 else if (known_eq (nunits, scatter_off_nunits * 2))
6312 modifier = NARROW;
6314 /* Currently gathers and scatters are only supported for
6315 fixed-length vectors. */
6316 unsigned int count = nunits.to_constant ();
6317 vec_perm_builder sel (count, count, 1);
6318 for (i = 0; i < (unsigned int) count; ++i)
6319 sel.quick_push (i | (count / 2));
6321 vec_perm_indices indices (sel, 2, count);
6322 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
6323 gcc_assert (perm_mask != NULL_TREE);
6324 ncopies *= 2;
6326 else
6327 gcc_unreachable ();
6329 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6330 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6331 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6332 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6333 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6334 scaletype = TREE_VALUE (arglist);
6336 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6337 && TREE_CODE (rettype) == VOID_TYPE);
6339 ptr = fold_convert (ptrtype, gs_info.base);
6340 if (!is_gimple_min_invariant (ptr))
6342 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6343 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6344 gcc_assert (!new_bb);
6347 /* Currently we support only unconditional scatter stores,
6348 so mask should be all ones. */
6349 mask = build_int_cst (masktype, -1);
6350 mask = vect_init_vector (stmt, mask, masktype, NULL);
6352 scale = build_int_cst (scaletype, gs_info.scale);
6354 prev_stmt_info = NULL;
6355 for (j = 0; j < ncopies; ++j)
6357 if (j == 0)
6359 src = vec_oprnd1
6360 = vect_get_vec_def_for_operand (op, stmt);
6361 op = vec_oprnd0
6362 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6364 else if (modifier != NONE && (j & 1))
6366 if (modifier == WIDEN)
6368 src = vec_oprnd1
6369 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
6370 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6371 stmt, gsi);
6373 else if (modifier == NARROW)
6375 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6376 stmt, gsi);
6377 op = vec_oprnd0
6378 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6379 vec_oprnd0);
6381 else
6382 gcc_unreachable ();
6384 else
6386 src = vec_oprnd1
6387 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
6388 op = vec_oprnd0
6389 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6390 vec_oprnd0);
6393 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6395 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6396 TYPE_VECTOR_SUBPARTS (srctype)));
6397 var = vect_get_new_ssa_name (srctype, vect_simple_var);
6398 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6399 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6400 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6401 src = var;
6404 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6406 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6407 TYPE_VECTOR_SUBPARTS (idxtype)));
6408 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6409 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6410 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6411 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6412 op = var;
6415 new_stmt
6416 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
6418 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6420 if (prev_stmt_info == NULL)
6421 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6422 else
6423 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6424 prev_stmt_info = vinfo_for_stmt (new_stmt);
6426 return true;
6429 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6431 gimple *group_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6432 GROUP_STORE_COUNT (vinfo_for_stmt (group_stmt))++;
6435 if (grouped_store)
6437 /* FORNOW */
6438 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
6440 /* We vectorize all the stmts of the interleaving group when we
6441 reach the last stmt in the group. */
6442 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
6443 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
6444 && !slp)
6446 *vec_stmt = NULL;
6447 return true;
6450 if (slp)
6452 grouped_store = false;
6453 /* VEC_NUM is the number of vect stmts to be created for this
6454 group. */
6455 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6456 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6457 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
6458 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6459 op = vect_get_store_rhs (first_stmt);
6461 else
6462 /* VEC_NUM is the number of vect stmts to be created for this
6463 group. */
6464 vec_num = group_size;
6466 ref_type = get_group_alias_ptr_type (first_stmt);
6468 else
6469 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6471 if (dump_enabled_p ())
6472 dump_printf_loc (MSG_NOTE, vect_location,
6473 "transform store. ncopies = %d\n", ncopies);
6475 if (memory_access_type == VMAT_ELEMENTWISE
6476 || memory_access_type == VMAT_STRIDED_SLP)
6478 gimple_stmt_iterator incr_gsi;
6479 bool insert_after;
6480 gimple *incr;
6481 tree offvar;
6482 tree ivstep;
6483 tree running_off;
6484 tree stride_base, stride_step, alias_off;
6485 tree vec_oprnd;
6486 unsigned int g;
6487 /* Checked by get_load_store_type. */
6488 unsigned int const_nunits = nunits.to_constant ();
6490 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
6491 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6493 stride_base
6494 = fold_build_pointer_plus
6495 (DR_BASE_ADDRESS (first_dr),
6496 size_binop (PLUS_EXPR,
6497 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6498 convert_to_ptrofftype (DR_INIT (first_dr))));
6499 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6501 /* For a store with loop-invariant (but other than power-of-2)
6502 stride (i.e. not a grouped access) like so:
6504 for (i = 0; i < n; i += stride)
6505 array[i] = ...;
6507 we generate a new induction variable and new stores from
6508 the components of the (vectorized) rhs:
6510 for (j = 0; ; j += VF*stride)
6511 vectemp = ...;
6512 tmp1 = vectemp[0];
6513 array[j] = tmp1;
6514 tmp2 = vectemp[1];
6515 array[j + stride] = tmp2;
6519 unsigned nstores = const_nunits;
6520 unsigned lnel = 1;
6521 tree ltype = elem_type;
6522 tree lvectype = vectype;
6523 if (slp)
6525 if (group_size < const_nunits
6526 && const_nunits % group_size == 0)
6528 nstores = const_nunits / group_size;
6529 lnel = group_size;
6530 ltype = build_vector_type (elem_type, group_size);
6531 lvectype = vectype;
6533 /* First check if vec_extract optab doesn't support extraction
6534 of vector elts directly. */
6535 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6536 machine_mode vmode;
6537 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6538 || !VECTOR_MODE_P (vmode)
6539 || !targetm.vector_mode_supported_p (vmode)
6540 || (convert_optab_handler (vec_extract_optab,
6541 TYPE_MODE (vectype), vmode)
6542 == CODE_FOR_nothing))
6544 /* Try to avoid emitting an extract of vector elements
6545 by performing the extracts using an integer type of the
6546 same size, extracting from a vector of those and then
6547 re-interpreting it as the original vector type if
6548 supported. */
6549 unsigned lsize
6550 = group_size * GET_MODE_BITSIZE (elmode);
6551 elmode = int_mode_for_size (lsize, 0).require ();
6552 unsigned int lnunits = const_nunits / group_size;
6553 /* If we can't construct such a vector fall back to
6554 element extracts from the original vector type and
6555 element size stores. */
6556 if (mode_for_vector (elmode, lnunits).exists (&vmode)
6557 && VECTOR_MODE_P (vmode)
6558 && targetm.vector_mode_supported_p (vmode)
6559 && (convert_optab_handler (vec_extract_optab,
6560 vmode, elmode)
6561 != CODE_FOR_nothing))
6563 nstores = lnunits;
6564 lnel = group_size;
6565 ltype = build_nonstandard_integer_type (lsize, 1);
6566 lvectype = build_vector_type (ltype, nstores);
6568 /* Else fall back to vector extraction anyway.
6569 Fewer stores are more important than avoiding spilling
6570 of the vector we extract from. Compared to the
6571 construction case in vectorizable_load no store-forwarding
6572 issue exists here for reasonable archs. */
6575 else if (group_size >= const_nunits
6576 && group_size % const_nunits == 0)
6578 nstores = 1;
6579 lnel = const_nunits;
6580 ltype = vectype;
6581 lvectype = vectype;
6583 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6584 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6587 ivstep = stride_step;
6588 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6589 build_int_cst (TREE_TYPE (ivstep), vf));
6591 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6593 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6594 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
6595 create_iv (stride_base, ivstep, NULL,
6596 loop, &incr_gsi, insert_after,
6597 &offvar, NULL);
6598 incr = gsi_stmt (incr_gsi);
6599 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6601 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
6603 prev_stmt_info = NULL;
6604 alias_off = build_int_cst (ref_type, 0);
6605 next_stmt = first_stmt;
6606 for (g = 0; g < group_size; g++)
6608 running_off = offvar;
6609 if (g)
6611 tree size = TYPE_SIZE_UNIT (ltype);
6612 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6613 size);
6614 tree newoff = copy_ssa_name (running_off, NULL);
6615 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6616 running_off, pos);
6617 vect_finish_stmt_generation (stmt, incr, gsi);
6618 running_off = newoff;
6620 unsigned int group_el = 0;
6621 unsigned HOST_WIDE_INT
6622 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6623 for (j = 0; j < ncopies; j++)
6625 /* We've set op and dt above, from vect_get_store_rhs,
6626 and first_stmt == stmt. */
6627 if (j == 0)
6629 if (slp)
6631 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6632 slp_node);
6633 vec_oprnd = vec_oprnds[0];
6635 else
6637 op = vect_get_store_rhs (next_stmt);
6638 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6641 else
6643 if (slp)
6644 vec_oprnd = vec_oprnds[j];
6645 else
6647 vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt);
6648 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt,
6649 vec_oprnd);
6652 /* Pun the vector to extract from if necessary. */
6653 if (lvectype != vectype)
6655 tree tem = make_ssa_name (lvectype);
6656 gimple *pun
6657 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6658 lvectype, vec_oprnd));
6659 vect_finish_stmt_generation (stmt, pun, gsi);
6660 vec_oprnd = tem;
6662 for (i = 0; i < nstores; i++)
6664 tree newref, newoff;
6665 gimple *incr, *assign;
6666 tree size = TYPE_SIZE (ltype);
6667 /* Extract the i'th component. */
6668 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6669 bitsize_int (i), size);
6670 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6671 size, pos);
6673 elem = force_gimple_operand_gsi (gsi, elem, true,
6674 NULL_TREE, true,
6675 GSI_SAME_STMT);
6677 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6678 group_el * elsz);
6679 newref = build2 (MEM_REF, ltype,
6680 running_off, this_off);
6681 vect_copy_ref_info (newref, DR_REF (first_dr));
6683 /* And store it to *running_off. */
6684 assign = gimple_build_assign (newref, elem);
6685 vect_finish_stmt_generation (stmt, assign, gsi);
6687 group_el += lnel;
6688 if (! slp
6689 || group_el == group_size)
6691 newoff = copy_ssa_name (running_off, NULL);
6692 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6693 running_off, stride_step);
6694 vect_finish_stmt_generation (stmt, incr, gsi);
6696 running_off = newoff;
6697 group_el = 0;
6699 if (g == group_size - 1
6700 && !slp)
6702 if (j == 0 && i == 0)
6703 STMT_VINFO_VEC_STMT (stmt_info)
6704 = *vec_stmt = assign;
6705 else
6706 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6707 prev_stmt_info = vinfo_for_stmt (assign);
6711 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6712 if (slp)
6713 break;
6716 vec_oprnds.release ();
6717 return true;
6720 auto_vec<tree> dr_chain (group_size);
6721 oprnds.create (group_size);
6723 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6724 gcc_assert (alignment_support_scheme);
6725 vec_loop_masks *loop_masks
6726 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6727 ? &LOOP_VINFO_MASKS (loop_vinfo)
6728 : NULL);
6729 /* Targets with store-lane instructions must not require explicit
6730 realignment. vect_supportable_dr_alignment always returns either
6731 dr_aligned or dr_unaligned_supported for masked operations. */
6732 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6733 && !mask
6734 && !loop_masks)
6735 || alignment_support_scheme == dr_aligned
6736 || alignment_support_scheme == dr_unaligned_supported);
6738 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6739 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6740 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6742 tree bump;
6743 tree vec_offset = NULL_TREE;
6744 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6746 aggr_type = NULL_TREE;
6747 bump = NULL_TREE;
6749 else if (memory_access_type == VMAT_GATHER_SCATTER)
6751 aggr_type = elem_type;
6752 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
6753 &bump, &vec_offset);
6755 else
6757 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6758 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6759 else
6760 aggr_type = vectype;
6761 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
6764 if (mask)
6765 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6767 /* In case the vectorization factor (VF) is bigger than the number
6768 of elements that we can fit in a vectype (nunits), we have to generate
6769 more than one vector stmt - i.e - we need to "unroll" the
6770 vector stmt by a factor VF/nunits. For more details see documentation in
6771 vect_get_vec_def_for_copy_stmt. */
6773 /* In case of interleaving (non-unit grouped access):
6775 S1: &base + 2 = x2
6776 S2: &base = x0
6777 S3: &base + 1 = x1
6778 S4: &base + 3 = x3
6780 We create vectorized stores starting from base address (the access of the
6781 first stmt in the chain (S2 in the above example), when the last store stmt
6782 of the chain (S4) is reached:
6784 VS1: &base = vx2
6785 VS2: &base + vec_size*1 = vx0
6786 VS3: &base + vec_size*2 = vx1
6787 VS4: &base + vec_size*3 = vx3
6789 Then permutation statements are generated:
6791 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6792 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6795 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6796 (the order of the data-refs in the output of vect_permute_store_chain
6797 corresponds to the order of scalar stmts in the interleaving chain - see
6798 the documentation of vect_permute_store_chain()).
6800 In case of both multiple types and interleaving, above vector stores and
6801 permutation stmts are created for every copy. The result vector stmts are
6802 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6803 STMT_VINFO_RELATED_STMT for the next copies.
6806 prev_stmt_info = NULL;
6807 tree vec_mask = NULL_TREE;
6808 for (j = 0; j < ncopies; j++)
6811 if (j == 0)
6813 if (slp)
6815 /* Get vectorized arguments for SLP_NODE. */
6816 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6817 NULL, slp_node);
6819 vec_oprnd = vec_oprnds[0];
6821 else
6823 /* For interleaved stores we collect vectorized defs for all the
6824 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6825 used as an input to vect_permute_store_chain(), and OPRNDS as
6826 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6828 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6829 OPRNDS are of size 1. */
6830 next_stmt = first_stmt;
6831 for (i = 0; i < group_size; i++)
6833 /* Since gaps are not supported for interleaved stores,
6834 GROUP_SIZE is the exact number of stmts in the chain.
6835 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6836 there is no interleaving, GROUP_SIZE is 1, and only one
6837 iteration of the loop will be executed. */
6838 op = vect_get_store_rhs (next_stmt);
6839 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6840 dr_chain.quick_push (vec_oprnd);
6841 oprnds.quick_push (vec_oprnd);
6842 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6844 if (mask)
6845 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
6846 mask_vectype);
6849 /* We should have catched mismatched types earlier. */
6850 gcc_assert (useless_type_conversion_p (vectype,
6851 TREE_TYPE (vec_oprnd)));
6852 bool simd_lane_access_p
6853 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6854 if (simd_lane_access_p
6855 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6856 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6857 && integer_zerop (DR_OFFSET (first_dr))
6858 && integer_zerop (DR_INIT (first_dr))
6859 && alias_sets_conflict_p (get_alias_set (aggr_type),
6860 get_alias_set (TREE_TYPE (ref_type))))
6862 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6863 dataref_offset = build_int_cst (ref_type, 0);
6864 inv_p = false;
6866 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6868 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
6869 &dataref_ptr, &vec_offset);
6870 inv_p = false;
6872 else
6873 dataref_ptr
6874 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6875 simd_lane_access_p ? loop : NULL,
6876 offset, &dummy, gsi, &ptr_incr,
6877 simd_lane_access_p, &inv_p,
6878 NULL_TREE, bump);
6879 gcc_assert (bb_vinfo || !inv_p);
6881 else
6883 /* For interleaved stores we created vectorized defs for all the
6884 defs stored in OPRNDS in the previous iteration (previous copy).
6885 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6886 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6887 next copy.
6888 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6889 OPRNDS are of size 1. */
6890 for (i = 0; i < group_size; i++)
6892 op = oprnds[i];
6893 vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt);
6894 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt, op);
6895 dr_chain[i] = vec_oprnd;
6896 oprnds[i] = vec_oprnd;
6898 if (mask)
6899 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
6900 if (dataref_offset)
6901 dataref_offset
6902 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
6903 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6904 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6905 vec_offset);
6906 else
6907 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6908 bump);
6911 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6913 tree vec_array;
6915 /* Get an array into which we can store the individual vectors. */
6916 vec_array = create_vector_array (vectype, vec_num);
6918 /* Invalidate the current contents of VEC_ARRAY. This should
6919 become an RTL clobber too, which prevents the vector registers
6920 from being upward-exposed. */
6921 vect_clobber_variable (stmt, gsi, vec_array);
6923 /* Store the individual vectors into the array. */
6924 for (i = 0; i < vec_num; i++)
6926 vec_oprnd = dr_chain[i];
6927 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6930 tree final_mask = NULL;
6931 if (loop_masks)
6932 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
6933 vectype, j);
6934 if (vec_mask)
6935 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
6936 vec_mask, gsi);
6938 gcall *call;
6939 if (final_mask)
6941 /* Emit:
6942 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
6943 VEC_ARRAY). */
6944 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
6945 tree alias_ptr = build_int_cst (ref_type, align);
6946 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
6947 dataref_ptr, alias_ptr,
6948 final_mask, vec_array);
6950 else
6952 /* Emit:
6953 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6954 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6955 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
6956 vec_array);
6957 gimple_call_set_lhs (call, data_ref);
6959 gimple_call_set_nothrow (call, true);
6960 new_stmt = call;
6961 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6963 /* Record that VEC_ARRAY is now dead. */
6964 vect_clobber_variable (stmt, gsi, vec_array);
6966 else
6968 new_stmt = NULL;
6969 if (grouped_store)
6971 if (j == 0)
6972 result_chain.create (group_size);
6973 /* Permute. */
6974 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6975 &result_chain);
6978 next_stmt = first_stmt;
6979 for (i = 0; i < vec_num; i++)
6981 unsigned align, misalign;
6983 tree final_mask = NULL_TREE;
6984 if (loop_masks)
6985 final_mask = vect_get_loop_mask (gsi, loop_masks,
6986 vec_num * ncopies,
6987 vectype, vec_num * j + i);
6988 if (vec_mask)
6989 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
6990 vec_mask, gsi);
6992 if (memory_access_type == VMAT_GATHER_SCATTER)
6994 tree scale = size_int (gs_info.scale);
6995 gcall *call;
6996 if (loop_masks)
6997 call = gimple_build_call_internal
6998 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
6999 scale, vec_oprnd, final_mask);
7000 else
7001 call = gimple_build_call_internal
7002 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7003 scale, vec_oprnd);
7004 gimple_call_set_nothrow (call, true);
7005 new_stmt = call;
7006 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7007 break;
7010 if (i > 0)
7011 /* Bump the vector pointer. */
7012 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7013 stmt, bump);
7015 if (slp)
7016 vec_oprnd = vec_oprnds[i];
7017 else if (grouped_store)
7018 /* For grouped stores vectorized defs are interleaved in
7019 vect_permute_store_chain(). */
7020 vec_oprnd = result_chain[i];
7022 align = DR_TARGET_ALIGNMENT (first_dr);
7023 if (aligned_access_p (first_dr))
7024 misalign = 0;
7025 else if (DR_MISALIGNMENT (first_dr) == -1)
7027 align = dr_alignment (vect_dr_behavior (first_dr));
7028 misalign = 0;
7030 else
7031 misalign = DR_MISALIGNMENT (first_dr);
7032 if (dataref_offset == NULL_TREE
7033 && TREE_CODE (dataref_ptr) == SSA_NAME)
7034 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7035 misalign);
7037 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7039 tree perm_mask = perm_mask_for_reverse (vectype);
7040 tree perm_dest
7041 = vect_create_destination_var (vect_get_store_rhs (stmt),
7042 vectype);
7043 tree new_temp = make_ssa_name (perm_dest);
7045 /* Generate the permute statement. */
7046 gimple *perm_stmt
7047 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7048 vec_oprnd, perm_mask);
7049 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7051 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7052 vec_oprnd = new_temp;
7055 /* Arguments are ready. Create the new vector stmt. */
7056 if (final_mask)
7058 align = least_bit_hwi (misalign | align);
7059 tree ptr = build_int_cst (ref_type, align);
7060 gcall *call
7061 = gimple_build_call_internal (IFN_MASK_STORE, 4,
7062 dataref_ptr, ptr,
7063 final_mask, vec_oprnd);
7064 gimple_call_set_nothrow (call, true);
7065 new_stmt = call;
7067 else
7069 data_ref = fold_build2 (MEM_REF, vectype,
7070 dataref_ptr,
7071 dataref_offset
7072 ? dataref_offset
7073 : build_int_cst (ref_type, 0));
7074 if (aligned_access_p (first_dr))
7076 else if (DR_MISALIGNMENT (first_dr) == -1)
7077 TREE_TYPE (data_ref)
7078 = build_aligned_type (TREE_TYPE (data_ref),
7079 align * BITS_PER_UNIT);
7080 else
7081 TREE_TYPE (data_ref)
7082 = build_aligned_type (TREE_TYPE (data_ref),
7083 TYPE_ALIGN (elem_type));
7084 vect_copy_ref_info (data_ref, DR_REF (first_dr));
7085 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
7087 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7089 if (slp)
7090 continue;
7092 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
7093 if (!next_stmt)
7094 break;
7097 if (!slp)
7099 if (j == 0)
7100 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7101 else
7102 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7103 prev_stmt_info = vinfo_for_stmt (new_stmt);
7107 oprnds.release ();
7108 result_chain.release ();
7109 vec_oprnds.release ();
7111 return true;
7114 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7115 VECTOR_CST mask. No checks are made that the target platform supports the
7116 mask, so callers may wish to test can_vec_perm_const_p separately, or use
7117 vect_gen_perm_mask_checked. */
7119 tree
7120 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
7122 tree mask_type;
7124 poly_uint64 nunits = sel.length ();
7125 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
7127 mask_type = build_vector_type (ssizetype, nunits);
7128 return vec_perm_indices_to_tree (mask_type, sel);
7131 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
7132 i.e. that the target supports the pattern _for arbitrary input vectors_. */
7134 tree
7135 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
7137 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
7138 return vect_gen_perm_mask_any (vectype, sel);
7141 /* Given a vector variable X and Y, that was generated for the scalar
7142 STMT, generate instructions to permute the vector elements of X and Y
7143 using permutation mask MASK_VEC, insert them at *GSI and return the
7144 permuted vector variable. */
7146 static tree
7147 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
7148 gimple_stmt_iterator *gsi)
7150 tree vectype = TREE_TYPE (x);
7151 tree perm_dest, data_ref;
7152 gimple *perm_stmt;
7154 tree scalar_dest = gimple_get_lhs (stmt);
7155 if (TREE_CODE (scalar_dest) == SSA_NAME)
7156 perm_dest = vect_create_destination_var (scalar_dest, vectype);
7157 else
7158 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
7159 data_ref = make_ssa_name (perm_dest);
7161 /* Generate the permute statement. */
7162 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
7163 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7165 return data_ref;
7168 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
7169 inserting them on the loops preheader edge. Returns true if we
7170 were successful in doing so (and thus STMT can be moved then),
7171 otherwise returns false. */
7173 static bool
7174 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
7176 ssa_op_iter i;
7177 tree op;
7178 bool any = false;
7180 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7182 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7183 if (!gimple_nop_p (def_stmt)
7184 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7186 /* Make sure we don't need to recurse. While we could do
7187 so in simple cases when there are more complex use webs
7188 we don't have an easy way to preserve stmt order to fulfil
7189 dependencies within them. */
7190 tree op2;
7191 ssa_op_iter i2;
7192 if (gimple_code (def_stmt) == GIMPLE_PHI)
7193 return false;
7194 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7196 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
7197 if (!gimple_nop_p (def_stmt2)
7198 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7199 return false;
7201 any = true;
7205 if (!any)
7206 return true;
7208 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7210 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7211 if (!gimple_nop_p (def_stmt)
7212 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7214 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7215 gsi_remove (&gsi, false);
7216 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7220 return true;
7223 /* vectorizable_load.
7225 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
7226 can be vectorized.
7227 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7228 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
7229 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7231 static bool
7232 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
7233 slp_tree slp_node, slp_instance slp_node_instance)
7235 tree scalar_dest;
7236 tree vec_dest = NULL;
7237 tree data_ref = NULL;
7238 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7239 stmt_vec_info prev_stmt_info;
7240 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7241 struct loop *loop = NULL;
7242 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
7243 bool nested_in_vect_loop = false;
7244 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
7245 tree elem_type;
7246 tree new_temp;
7247 machine_mode mode;
7248 gimple *new_stmt = NULL;
7249 tree dummy;
7250 enum dr_alignment_support alignment_support_scheme;
7251 tree dataref_ptr = NULL_TREE;
7252 tree dataref_offset = NULL_TREE;
7253 gimple *ptr_incr = NULL;
7254 int ncopies;
7255 int i, j;
7256 unsigned int group_size;
7257 poly_uint64 group_gap_adj;
7258 tree msq = NULL_TREE, lsq;
7259 tree offset = NULL_TREE;
7260 tree byte_offset = NULL_TREE;
7261 tree realignment_token = NULL_TREE;
7262 gphi *phi = NULL;
7263 vec<tree> dr_chain = vNULL;
7264 bool grouped_load = false;
7265 gimple *first_stmt;
7266 gimple *first_stmt_for_drptr = NULL;
7267 bool inv_p;
7268 bool compute_in_loop = false;
7269 struct loop *at_loop;
7270 int vec_num;
7271 bool slp = (slp_node != NULL);
7272 bool slp_perm = false;
7273 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7274 poly_uint64 vf;
7275 tree aggr_type;
7276 gather_scatter_info gs_info;
7277 vec_info *vinfo = stmt_info->vinfo;
7278 tree ref_type;
7279 enum vect_def_type mask_dt = vect_unknown_def_type;
7281 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7282 return false;
7284 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7285 && ! vec_stmt)
7286 return false;
7288 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7289 if (is_gimple_assign (stmt))
7291 scalar_dest = gimple_assign_lhs (stmt);
7292 if (TREE_CODE (scalar_dest) != SSA_NAME)
7293 return false;
7295 tree_code code = gimple_assign_rhs_code (stmt);
7296 if (code != ARRAY_REF
7297 && code != BIT_FIELD_REF
7298 && code != INDIRECT_REF
7299 && code != COMPONENT_REF
7300 && code != IMAGPART_EXPR
7301 && code != REALPART_EXPR
7302 && code != MEM_REF
7303 && TREE_CODE_CLASS (code) != tcc_declaration)
7304 return false;
7306 else
7308 gcall *call = dyn_cast <gcall *> (stmt);
7309 if (!call || !gimple_call_internal_p (call))
7310 return false;
7312 internal_fn ifn = gimple_call_internal_fn (call);
7313 if (!internal_load_fn_p (ifn))
7314 return false;
7316 scalar_dest = gimple_call_lhs (call);
7317 if (!scalar_dest)
7318 return false;
7320 if (slp_node != NULL)
7322 if (dump_enabled_p ())
7323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7324 "SLP of masked loads not supported.\n");
7325 return false;
7328 int mask_index = internal_fn_mask_index (ifn);
7329 if (mask_index >= 0)
7331 mask = gimple_call_arg (call, mask_index);
7332 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
7333 &mask_vectype))
7334 return false;
7338 if (!STMT_VINFO_DATA_REF (stmt_info))
7339 return false;
7341 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7342 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7344 if (loop_vinfo)
7346 loop = LOOP_VINFO_LOOP (loop_vinfo);
7347 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
7348 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7350 else
7351 vf = 1;
7353 /* Multiple types in SLP are handled by creating the appropriate number of
7354 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7355 case of SLP. */
7356 if (slp)
7357 ncopies = 1;
7358 else
7359 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7361 gcc_assert (ncopies >= 1);
7363 /* FORNOW. This restriction should be relaxed. */
7364 if (nested_in_vect_loop && ncopies > 1)
7366 if (dump_enabled_p ())
7367 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7368 "multiple types in nested loop.\n");
7369 return false;
7372 /* Invalidate assumptions made by dependence analysis when vectorization
7373 on the unrolled body effectively re-orders stmts. */
7374 if (ncopies > 1
7375 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7376 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7377 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7379 if (dump_enabled_p ())
7380 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7381 "cannot perform implicit CSE when unrolling "
7382 "with negative dependence distance\n");
7383 return false;
7386 elem_type = TREE_TYPE (vectype);
7387 mode = TYPE_MODE (vectype);
7389 /* FORNOW. In some cases can vectorize even if data-type not supported
7390 (e.g. - data copies). */
7391 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
7393 if (dump_enabled_p ())
7394 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7395 "Aligned load, but unsupported type.\n");
7396 return false;
7399 /* Check if the load is a part of an interleaving chain. */
7400 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7402 grouped_load = true;
7403 /* FORNOW */
7404 gcc_assert (!nested_in_vect_loop);
7405 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
7407 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7408 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7410 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7411 slp_perm = true;
7413 /* Invalidate assumptions made by dependence analysis when vectorization
7414 on the unrolled body effectively re-orders stmts. */
7415 if (!PURE_SLP_STMT (stmt_info)
7416 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7417 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7418 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7420 if (dump_enabled_p ())
7421 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7422 "cannot perform implicit CSE when performing "
7423 "group loads with negative dependence distance\n");
7424 return false;
7427 /* Similarly when the stmt is a load that is both part of a SLP
7428 instance and a loop vectorized stmt via the same-dr mechanism
7429 we have to give up. */
7430 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
7431 && (STMT_SLP_TYPE (stmt_info)
7432 != STMT_SLP_TYPE (vinfo_for_stmt
7433 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
7435 if (dump_enabled_p ())
7436 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7437 "conflicting SLP types for CSEd load\n");
7438 return false;
7441 else
7442 group_size = 1;
7444 vect_memory_access_type memory_access_type;
7445 if (!get_load_store_type (stmt, vectype, slp, mask, VLS_LOAD, ncopies,
7446 &memory_access_type, &gs_info))
7447 return false;
7449 if (mask)
7451 if (memory_access_type == VMAT_CONTIGUOUS)
7453 machine_mode vec_mode = TYPE_MODE (vectype);
7454 if (!VECTOR_MODE_P (vec_mode)
7455 || !can_vec_mask_load_store_p (vec_mode,
7456 TYPE_MODE (mask_vectype), true))
7457 return false;
7459 else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7461 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7462 tree masktype
7463 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
7464 if (TREE_CODE (masktype) == INTEGER_TYPE)
7466 if (dump_enabled_p ())
7467 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7468 "masked gather with integer mask not"
7469 " supported.");
7470 return false;
7473 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7474 && memory_access_type != VMAT_GATHER_SCATTER)
7476 if (dump_enabled_p ())
7477 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7478 "unsupported access type for masked load.\n");
7479 return false;
7483 if (!vec_stmt) /* transformation not required. */
7485 if (!slp)
7486 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7488 if (loop_vinfo
7489 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7490 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
7491 memory_access_type, &gs_info);
7493 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
7494 /* The SLP costs are calculated during SLP analysis. */
7495 if (! slp_node)
7496 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7497 NULL, NULL, NULL);
7498 return true;
7501 if (!slp)
7502 gcc_assert (memory_access_type
7503 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7505 if (dump_enabled_p ())
7506 dump_printf_loc (MSG_NOTE, vect_location,
7507 "transform load. ncopies = %d\n", ncopies);
7509 /* Transform. */
7511 ensure_base_align (dr);
7513 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7515 vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask,
7516 mask_dt);
7517 return true;
7520 if (memory_access_type == VMAT_ELEMENTWISE
7521 || memory_access_type == VMAT_STRIDED_SLP)
7523 gimple_stmt_iterator incr_gsi;
7524 bool insert_after;
7525 gimple *incr;
7526 tree offvar;
7527 tree ivstep;
7528 tree running_off;
7529 vec<constructor_elt, va_gc> *v = NULL;
7530 tree stride_base, stride_step, alias_off;
7531 /* Checked by get_load_store_type. */
7532 unsigned int const_nunits = nunits.to_constant ();
7533 unsigned HOST_WIDE_INT cst_offset = 0;
7535 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7536 gcc_assert (!nested_in_vect_loop);
7538 if (grouped_load)
7540 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7541 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7543 else
7545 first_stmt = stmt;
7546 first_dr = dr;
7548 if (slp && grouped_load)
7550 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7551 ref_type = get_group_alias_ptr_type (first_stmt);
7553 else
7555 if (grouped_load)
7556 cst_offset
7557 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
7558 * vect_get_place_in_interleaving_chain (stmt, first_stmt));
7559 group_size = 1;
7560 ref_type = reference_alias_ptr_type (DR_REF (dr));
7563 stride_base
7564 = fold_build_pointer_plus
7565 (DR_BASE_ADDRESS (first_dr),
7566 size_binop (PLUS_EXPR,
7567 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7568 convert_to_ptrofftype (DR_INIT (first_dr))));
7569 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7571 /* For a load with loop-invariant (but other than power-of-2)
7572 stride (i.e. not a grouped access) like so:
7574 for (i = 0; i < n; i += stride)
7575 ... = array[i];
7577 we generate a new induction variable and new accesses to
7578 form a new vector (or vectors, depending on ncopies):
7580 for (j = 0; ; j += VF*stride)
7581 tmp1 = array[j];
7582 tmp2 = array[j + stride];
7584 vectemp = {tmp1, tmp2, ...}
7587 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7588 build_int_cst (TREE_TYPE (stride_step), vf));
7590 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7592 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7593 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7594 create_iv (stride_base, ivstep, NULL,
7595 loop, &incr_gsi, insert_after,
7596 &offvar, NULL);
7597 incr = gsi_stmt (incr_gsi);
7598 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7600 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7602 prev_stmt_info = NULL;
7603 running_off = offvar;
7604 alias_off = build_int_cst (ref_type, 0);
7605 int nloads = const_nunits;
7606 int lnel = 1;
7607 tree ltype = TREE_TYPE (vectype);
7608 tree lvectype = vectype;
7609 auto_vec<tree> dr_chain;
7610 if (memory_access_type == VMAT_STRIDED_SLP)
7612 if (group_size < const_nunits)
7614 /* First check if vec_init optab supports construction from
7615 vector elts directly. */
7616 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7617 machine_mode vmode;
7618 if (mode_for_vector (elmode, group_size).exists (&vmode)
7619 && VECTOR_MODE_P (vmode)
7620 && targetm.vector_mode_supported_p (vmode)
7621 && (convert_optab_handler (vec_init_optab,
7622 TYPE_MODE (vectype), vmode)
7623 != CODE_FOR_nothing))
7625 nloads = const_nunits / group_size;
7626 lnel = group_size;
7627 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7629 else
7631 /* Otherwise avoid emitting a constructor of vector elements
7632 by performing the loads using an integer type of the same
7633 size, constructing a vector of those and then
7634 re-interpreting it as the original vector type.
7635 This avoids a huge runtime penalty due to the general
7636 inability to perform store forwarding from smaller stores
7637 to a larger load. */
7638 unsigned lsize
7639 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7640 elmode = int_mode_for_size (lsize, 0).require ();
7641 unsigned int lnunits = const_nunits / group_size;
7642 /* If we can't construct such a vector fall back to
7643 element loads of the original vector type. */
7644 if (mode_for_vector (elmode, lnunits).exists (&vmode)
7645 && VECTOR_MODE_P (vmode)
7646 && targetm.vector_mode_supported_p (vmode)
7647 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7648 != CODE_FOR_nothing))
7650 nloads = lnunits;
7651 lnel = group_size;
7652 ltype = build_nonstandard_integer_type (lsize, 1);
7653 lvectype = build_vector_type (ltype, nloads);
7657 else
7659 nloads = 1;
7660 lnel = const_nunits;
7661 ltype = vectype;
7663 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7665 if (slp)
7667 /* For SLP permutation support we need to load the whole group,
7668 not only the number of vector stmts the permutation result
7669 fits in. */
7670 if (slp_perm)
7672 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7673 variable VF. */
7674 unsigned int const_vf = vf.to_constant ();
7675 ncopies = CEIL (group_size * const_vf, const_nunits);
7676 dr_chain.create (ncopies);
7678 else
7679 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7681 unsigned int group_el = 0;
7682 unsigned HOST_WIDE_INT
7683 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7684 for (j = 0; j < ncopies; j++)
7686 if (nloads > 1)
7687 vec_alloc (v, nloads);
7688 for (i = 0; i < nloads; i++)
7690 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7691 group_el * elsz + cst_offset);
7692 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
7693 vect_copy_ref_info (data_ref, DR_REF (first_dr));
7694 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
7695 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7696 if (nloads > 1)
7697 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7698 gimple_assign_lhs (new_stmt));
7700 group_el += lnel;
7701 if (! slp
7702 || group_el == group_size)
7704 tree newoff = copy_ssa_name (running_off);
7705 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7706 running_off, stride_step);
7707 vect_finish_stmt_generation (stmt, incr, gsi);
7709 running_off = newoff;
7710 group_el = 0;
7713 if (nloads > 1)
7715 tree vec_inv = build_constructor (lvectype, v);
7716 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7717 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7718 if (lvectype != vectype)
7720 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7721 VIEW_CONVERT_EXPR,
7722 build1 (VIEW_CONVERT_EXPR,
7723 vectype, new_temp));
7724 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7728 if (slp)
7730 if (slp_perm)
7731 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7732 else
7733 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7735 else
7737 if (j == 0)
7738 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7739 else
7740 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7741 prev_stmt_info = vinfo_for_stmt (new_stmt);
7744 if (slp_perm)
7746 unsigned n_perms;
7747 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7748 slp_node_instance, false, &n_perms);
7750 return true;
7753 if (memory_access_type == VMAT_GATHER_SCATTER
7754 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
7755 grouped_load = false;
7757 if (grouped_load)
7759 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7760 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7761 /* For SLP vectorization we directly vectorize a subchain
7762 without permutation. */
7763 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7764 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7765 /* For BB vectorization always use the first stmt to base
7766 the data ref pointer on. */
7767 if (bb_vinfo)
7768 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7770 /* Check if the chain of loads is already vectorized. */
7771 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7772 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7773 ??? But we can only do so if there is exactly one
7774 as we have no way to get at the rest. Leave the CSE
7775 opportunity alone.
7776 ??? With the group load eventually participating
7777 in multiple different permutations (having multiple
7778 slp nodes which refer to the same group) the CSE
7779 is even wrong code. See PR56270. */
7780 && !slp)
7782 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7783 return true;
7785 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7786 group_gap_adj = 0;
7788 /* VEC_NUM is the number of vect stmts to be created for this group. */
7789 if (slp)
7791 grouped_load = false;
7792 /* For SLP permutation support we need to load the whole group,
7793 not only the number of vector stmts the permutation result
7794 fits in. */
7795 if (slp_perm)
7797 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7798 variable VF. */
7799 unsigned int const_vf = vf.to_constant ();
7800 unsigned int const_nunits = nunits.to_constant ();
7801 vec_num = CEIL (group_size * const_vf, const_nunits);
7802 group_gap_adj = vf * group_size - nunits * vec_num;
7804 else
7806 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7807 group_gap_adj
7808 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
7811 else
7812 vec_num = group_size;
7814 ref_type = get_group_alias_ptr_type (first_stmt);
7816 else
7818 first_stmt = stmt;
7819 first_dr = dr;
7820 group_size = vec_num = 1;
7821 group_gap_adj = 0;
7822 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7825 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7826 gcc_assert (alignment_support_scheme);
7827 vec_loop_masks *loop_masks
7828 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7829 ? &LOOP_VINFO_MASKS (loop_vinfo)
7830 : NULL);
7831 /* Targets with store-lane instructions must not require explicit
7832 realignment. vect_supportable_dr_alignment always returns either
7833 dr_aligned or dr_unaligned_supported for masked operations. */
7834 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7835 && !mask
7836 && !loop_masks)
7837 || alignment_support_scheme == dr_aligned
7838 || alignment_support_scheme == dr_unaligned_supported);
7840 /* In case the vectorization factor (VF) is bigger than the number
7841 of elements that we can fit in a vectype (nunits), we have to generate
7842 more than one vector stmt - i.e - we need to "unroll" the
7843 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7844 from one copy of the vector stmt to the next, in the field
7845 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7846 stages to find the correct vector defs to be used when vectorizing
7847 stmts that use the defs of the current stmt. The example below
7848 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7849 need to create 4 vectorized stmts):
7851 before vectorization:
7852 RELATED_STMT VEC_STMT
7853 S1: x = memref - -
7854 S2: z = x + 1 - -
7856 step 1: vectorize stmt S1:
7857 We first create the vector stmt VS1_0, and, as usual, record a
7858 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7859 Next, we create the vector stmt VS1_1, and record a pointer to
7860 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7861 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7862 stmts and pointers:
7863 RELATED_STMT VEC_STMT
7864 VS1_0: vx0 = memref0 VS1_1 -
7865 VS1_1: vx1 = memref1 VS1_2 -
7866 VS1_2: vx2 = memref2 VS1_3 -
7867 VS1_3: vx3 = memref3 - -
7868 S1: x = load - VS1_0
7869 S2: z = x + 1 - -
7871 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7872 information we recorded in RELATED_STMT field is used to vectorize
7873 stmt S2. */
7875 /* In case of interleaving (non-unit grouped access):
7877 S1: x2 = &base + 2
7878 S2: x0 = &base
7879 S3: x1 = &base + 1
7880 S4: x3 = &base + 3
7882 Vectorized loads are created in the order of memory accesses
7883 starting from the access of the first stmt of the chain:
7885 VS1: vx0 = &base
7886 VS2: vx1 = &base + vec_size*1
7887 VS3: vx3 = &base + vec_size*2
7888 VS4: vx4 = &base + vec_size*3
7890 Then permutation statements are generated:
7892 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7893 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7896 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7897 (the order of the data-refs in the output of vect_permute_load_chain
7898 corresponds to the order of scalar stmts in the interleaving chain - see
7899 the documentation of vect_permute_load_chain()).
7900 The generation of permutation stmts and recording them in
7901 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7903 In case of both multiple types and interleaving, the vector loads and
7904 permutation stmts above are created for every copy. The result vector
7905 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7906 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7908 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7909 on a target that supports unaligned accesses (dr_unaligned_supported)
7910 we generate the following code:
7911 p = initial_addr;
7912 indx = 0;
7913 loop {
7914 p = p + indx * vectype_size;
7915 vec_dest = *(p);
7916 indx = indx + 1;
7919 Otherwise, the data reference is potentially unaligned on a target that
7920 does not support unaligned accesses (dr_explicit_realign_optimized) -
7921 then generate the following code, in which the data in each iteration is
7922 obtained by two vector loads, one from the previous iteration, and one
7923 from the current iteration:
7924 p1 = initial_addr;
7925 msq_init = *(floor(p1))
7926 p2 = initial_addr + VS - 1;
7927 realignment_token = call target_builtin;
7928 indx = 0;
7929 loop {
7930 p2 = p2 + indx * vectype_size
7931 lsq = *(floor(p2))
7932 vec_dest = realign_load (msq, lsq, realignment_token)
7933 indx = indx + 1;
7934 msq = lsq;
7935 } */
7937 /* If the misalignment remains the same throughout the execution of the
7938 loop, we can create the init_addr and permutation mask at the loop
7939 preheader. Otherwise, it needs to be created inside the loop.
7940 This can only occur when vectorizing memory accesses in the inner-loop
7941 nested within an outer-loop that is being vectorized. */
7943 if (nested_in_vect_loop
7944 && !multiple_p (DR_STEP_ALIGNMENT (dr),
7945 GET_MODE_SIZE (TYPE_MODE (vectype))))
7947 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7948 compute_in_loop = true;
7951 if ((alignment_support_scheme == dr_explicit_realign_optimized
7952 || alignment_support_scheme == dr_explicit_realign)
7953 && !compute_in_loop)
7955 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7956 alignment_support_scheme, NULL_TREE,
7957 &at_loop);
7958 if (alignment_support_scheme == dr_explicit_realign_optimized)
7960 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7961 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7962 size_one_node);
7965 else
7966 at_loop = loop;
7968 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7969 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7971 tree bump;
7972 tree vec_offset = NULL_TREE;
7973 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7975 aggr_type = NULL_TREE;
7976 bump = NULL_TREE;
7978 else if (memory_access_type == VMAT_GATHER_SCATTER)
7980 aggr_type = elem_type;
7981 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
7982 &bump, &vec_offset);
7984 else
7986 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7987 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7988 else
7989 aggr_type = vectype;
7990 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
7993 tree vec_mask = NULL_TREE;
7994 prev_stmt_info = NULL;
7995 poly_uint64 group_elt = 0;
7996 for (j = 0; j < ncopies; j++)
7998 /* 1. Create the vector or array pointer update chain. */
7999 if (j == 0)
8001 bool simd_lane_access_p
8002 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8003 if (simd_lane_access_p
8004 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
8005 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
8006 && integer_zerop (DR_OFFSET (first_dr))
8007 && integer_zerop (DR_INIT (first_dr))
8008 && alias_sets_conflict_p (get_alias_set (aggr_type),
8009 get_alias_set (TREE_TYPE (ref_type)))
8010 && (alignment_support_scheme == dr_aligned
8011 || alignment_support_scheme == dr_unaligned_supported))
8013 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
8014 dataref_offset = build_int_cst (ref_type, 0);
8015 inv_p = false;
8017 else if (first_stmt_for_drptr
8018 && first_stmt != first_stmt_for_drptr)
8020 dataref_ptr
8021 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
8022 at_loop, offset, &dummy, gsi,
8023 &ptr_incr, simd_lane_access_p,
8024 &inv_p, byte_offset, bump);
8025 /* Adjust the pointer by the difference to first_stmt. */
8026 data_reference_p ptrdr
8027 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
8028 tree diff = fold_convert (sizetype,
8029 size_binop (MINUS_EXPR,
8030 DR_INIT (first_dr),
8031 DR_INIT (ptrdr)));
8032 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8033 stmt, diff);
8035 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8037 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
8038 &dataref_ptr, &vec_offset);
8039 inv_p = false;
8041 else
8042 dataref_ptr
8043 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
8044 offset, &dummy, gsi, &ptr_incr,
8045 simd_lane_access_p, &inv_p,
8046 byte_offset, bump);
8047 if (mask)
8048 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
8049 mask_vectype);
8051 else
8053 if (dataref_offset)
8054 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
8055 bump);
8056 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8057 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
8058 vec_offset);
8059 else
8060 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8061 stmt, bump);
8062 if (mask)
8063 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
8066 if (grouped_load || slp_perm)
8067 dr_chain.create (vec_num);
8069 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8071 tree vec_array;
8073 vec_array = create_vector_array (vectype, vec_num);
8075 tree final_mask = NULL_TREE;
8076 if (loop_masks)
8077 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8078 vectype, j);
8079 if (vec_mask)
8080 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8081 vec_mask, gsi);
8083 gcall *call;
8084 if (final_mask)
8086 /* Emit:
8087 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8088 VEC_MASK). */
8089 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8090 tree alias_ptr = build_int_cst (ref_type, align);
8091 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8092 dataref_ptr, alias_ptr,
8093 final_mask);
8095 else
8097 /* Emit:
8098 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8099 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8100 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8102 gimple_call_set_lhs (call, vec_array);
8103 gimple_call_set_nothrow (call, true);
8104 new_stmt = call;
8105 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8107 /* Extract each vector into an SSA_NAME. */
8108 for (i = 0; i < vec_num; i++)
8110 new_temp = read_vector_array (stmt, gsi, scalar_dest,
8111 vec_array, i);
8112 dr_chain.quick_push (new_temp);
8115 /* Record the mapping between SSA_NAMEs and statements. */
8116 vect_record_grouped_load_vectors (stmt, dr_chain);
8118 /* Record that VEC_ARRAY is now dead. */
8119 vect_clobber_variable (stmt, gsi, vec_array);
8121 else
8123 for (i = 0; i < vec_num; i++)
8125 tree final_mask = NULL_TREE;
8126 if (loop_masks
8127 && memory_access_type != VMAT_INVARIANT)
8128 final_mask = vect_get_loop_mask (gsi, loop_masks,
8129 vec_num * ncopies,
8130 vectype, vec_num * j + i);
8131 if (vec_mask)
8132 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8133 vec_mask, gsi);
8135 if (i > 0)
8136 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8137 stmt, bump);
8139 /* 2. Create the vector-load in the loop. */
8140 switch (alignment_support_scheme)
8142 case dr_aligned:
8143 case dr_unaligned_supported:
8145 unsigned int align, misalign;
8147 if (memory_access_type == VMAT_GATHER_SCATTER)
8149 tree scale = size_int (gs_info.scale);
8150 gcall *call;
8151 if (loop_masks)
8152 call = gimple_build_call_internal
8153 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8154 vec_offset, scale, final_mask);
8155 else
8156 call = gimple_build_call_internal
8157 (IFN_GATHER_LOAD, 3, dataref_ptr,
8158 vec_offset, scale);
8159 gimple_call_set_nothrow (call, true);
8160 new_stmt = call;
8161 data_ref = NULL_TREE;
8162 break;
8165 align = DR_TARGET_ALIGNMENT (dr);
8166 if (alignment_support_scheme == dr_aligned)
8168 gcc_assert (aligned_access_p (first_dr));
8169 misalign = 0;
8171 else if (DR_MISALIGNMENT (first_dr) == -1)
8173 align = dr_alignment (vect_dr_behavior (first_dr));
8174 misalign = 0;
8176 else
8177 misalign = DR_MISALIGNMENT (first_dr);
8178 if (dataref_offset == NULL_TREE
8179 && TREE_CODE (dataref_ptr) == SSA_NAME)
8180 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8181 align, misalign);
8183 if (final_mask)
8185 align = least_bit_hwi (misalign | align);
8186 tree ptr = build_int_cst (ref_type, align);
8187 gcall *call
8188 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8189 dataref_ptr, ptr,
8190 final_mask);
8191 gimple_call_set_nothrow (call, true);
8192 new_stmt = call;
8193 data_ref = NULL_TREE;
8195 else
8197 data_ref
8198 = fold_build2 (MEM_REF, vectype, dataref_ptr,
8199 dataref_offset
8200 ? dataref_offset
8201 : build_int_cst (ref_type, 0));
8202 if (alignment_support_scheme == dr_aligned)
8204 else if (DR_MISALIGNMENT (first_dr) == -1)
8205 TREE_TYPE (data_ref)
8206 = build_aligned_type (TREE_TYPE (data_ref),
8207 align * BITS_PER_UNIT);
8208 else
8209 TREE_TYPE (data_ref)
8210 = build_aligned_type (TREE_TYPE (data_ref),
8211 TYPE_ALIGN (elem_type));
8213 break;
8215 case dr_explicit_realign:
8217 tree ptr, bump;
8219 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8221 if (compute_in_loop)
8222 msq = vect_setup_realignment (first_stmt, gsi,
8223 &realignment_token,
8224 dr_explicit_realign,
8225 dataref_ptr, NULL);
8227 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8228 ptr = copy_ssa_name (dataref_ptr);
8229 else
8230 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
8231 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8232 new_stmt = gimple_build_assign
8233 (ptr, BIT_AND_EXPR, dataref_ptr,
8234 build_int_cst
8235 (TREE_TYPE (dataref_ptr),
8236 -(HOST_WIDE_INT) align));
8237 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8238 data_ref
8239 = build2 (MEM_REF, vectype, ptr,
8240 build_int_cst (ref_type, 0));
8241 vect_copy_ref_info (data_ref, DR_REF (first_dr));
8242 vec_dest = vect_create_destination_var (scalar_dest,
8243 vectype);
8244 new_stmt = gimple_build_assign (vec_dest, data_ref);
8245 new_temp = make_ssa_name (vec_dest, new_stmt);
8246 gimple_assign_set_lhs (new_stmt, new_temp);
8247 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
8248 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
8249 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8250 msq = new_temp;
8252 bump = size_binop (MULT_EXPR, vs,
8253 TYPE_SIZE_UNIT (elem_type));
8254 bump = size_binop (MINUS_EXPR, bump, size_one_node);
8255 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
8256 new_stmt = gimple_build_assign
8257 (NULL_TREE, BIT_AND_EXPR, ptr,
8258 build_int_cst
8259 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
8260 ptr = copy_ssa_name (ptr, new_stmt);
8261 gimple_assign_set_lhs (new_stmt, ptr);
8262 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8263 data_ref
8264 = build2 (MEM_REF, vectype, ptr,
8265 build_int_cst (ref_type, 0));
8266 break;
8268 case dr_explicit_realign_optimized:
8270 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8271 new_temp = copy_ssa_name (dataref_ptr);
8272 else
8273 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8274 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8275 new_stmt = gimple_build_assign
8276 (new_temp, BIT_AND_EXPR, dataref_ptr,
8277 build_int_cst (TREE_TYPE (dataref_ptr),
8278 -(HOST_WIDE_INT) align));
8279 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8280 data_ref
8281 = build2 (MEM_REF, vectype, new_temp,
8282 build_int_cst (ref_type, 0));
8283 break;
8285 default:
8286 gcc_unreachable ();
8288 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8289 /* DATA_REF is null if we've already built the statement. */
8290 if (data_ref)
8292 vect_copy_ref_info (data_ref, DR_REF (first_dr));
8293 new_stmt = gimple_build_assign (vec_dest, data_ref);
8295 new_temp = make_ssa_name (vec_dest, new_stmt);
8296 gimple_set_lhs (new_stmt, new_temp);
8297 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8299 /* 3. Handle explicit realignment if necessary/supported.
8300 Create in loop:
8301 vec_dest = realign_load (msq, lsq, realignment_token) */
8302 if (alignment_support_scheme == dr_explicit_realign_optimized
8303 || alignment_support_scheme == dr_explicit_realign)
8305 lsq = gimple_assign_lhs (new_stmt);
8306 if (!realignment_token)
8307 realignment_token = dataref_ptr;
8308 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8309 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8310 msq, lsq, realignment_token);
8311 new_temp = make_ssa_name (vec_dest, new_stmt);
8312 gimple_assign_set_lhs (new_stmt, new_temp);
8313 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8315 if (alignment_support_scheme == dr_explicit_realign_optimized)
8317 gcc_assert (phi);
8318 if (i == vec_num - 1 && j == ncopies - 1)
8319 add_phi_arg (phi, lsq,
8320 loop_latch_edge (containing_loop),
8321 UNKNOWN_LOCATION);
8322 msq = lsq;
8326 /* 4. Handle invariant-load. */
8327 if (inv_p && !bb_vinfo)
8329 gcc_assert (!grouped_load);
8330 /* If we have versioned for aliasing or the loop doesn't
8331 have any data dependencies that would preclude this,
8332 then we are sure this is a loop invariant load and
8333 thus we can insert it on the preheader edge. */
8334 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8335 && !nested_in_vect_loop
8336 && hoist_defs_of_uses (stmt, loop))
8338 if (dump_enabled_p ())
8340 dump_printf_loc (MSG_NOTE, vect_location,
8341 "hoisting out of the vectorized "
8342 "loop: ");
8343 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8345 tree tem = copy_ssa_name (scalar_dest);
8346 gsi_insert_on_edge_immediate
8347 (loop_preheader_edge (loop),
8348 gimple_build_assign (tem,
8349 unshare_expr
8350 (gimple_assign_rhs1 (stmt))));
8351 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
8352 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8353 set_vinfo_for_stmt (new_stmt,
8354 new_stmt_vec_info (new_stmt, vinfo));
8356 else
8358 gimple_stmt_iterator gsi2 = *gsi;
8359 gsi_next (&gsi2);
8360 new_temp = vect_init_vector (stmt, scalar_dest,
8361 vectype, &gsi2);
8362 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8366 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8368 tree perm_mask = perm_mask_for_reverse (vectype);
8369 new_temp = permute_vec_elements (new_temp, new_temp,
8370 perm_mask, stmt, gsi);
8371 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8374 /* Collect vector loads and later create their permutation in
8375 vect_transform_grouped_load (). */
8376 if (grouped_load || slp_perm)
8377 dr_chain.quick_push (new_temp);
8379 /* Store vector loads in the corresponding SLP_NODE. */
8380 if (slp && !slp_perm)
8381 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8383 /* With SLP permutation we load the gaps as well, without
8384 we need to skip the gaps after we manage to fully load
8385 all elements. group_gap_adj is GROUP_SIZE here. */
8386 group_elt += nunits;
8387 if (maybe_ne (group_gap_adj, 0U)
8388 && !slp_perm
8389 && known_eq (group_elt, group_size - group_gap_adj))
8391 poly_wide_int bump_val
8392 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8393 * group_gap_adj);
8394 tree bump = wide_int_to_tree (sizetype, bump_val);
8395 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8396 stmt, bump);
8397 group_elt = 0;
8400 /* Bump the vector pointer to account for a gap or for excess
8401 elements loaded for a permuted SLP load. */
8402 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
8404 poly_wide_int bump_val
8405 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8406 * group_gap_adj);
8407 tree bump = wide_int_to_tree (sizetype, bump_val);
8408 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8409 stmt, bump);
8413 if (slp && !slp_perm)
8414 continue;
8416 if (slp_perm)
8418 unsigned n_perms;
8419 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
8420 slp_node_instance, false,
8421 &n_perms))
8423 dr_chain.release ();
8424 return false;
8427 else
8429 if (grouped_load)
8431 if (memory_access_type != VMAT_LOAD_STORE_LANES)
8432 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
8433 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8435 else
8437 if (j == 0)
8438 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8439 else
8440 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8441 prev_stmt_info = vinfo_for_stmt (new_stmt);
8444 dr_chain.release ();
8447 return true;
8450 /* Function vect_is_simple_cond.
8452 Input:
8453 LOOP - the loop that is being vectorized.
8454 COND - Condition that is checked for simple use.
8456 Output:
8457 *COMP_VECTYPE - the vector type for the comparison.
8458 *DTS - The def types for the arguments of the comparison
8460 Returns whether a COND can be vectorized. Checks whether
8461 condition operands are supportable using vec_is_simple_use. */
8463 static bool
8464 vect_is_simple_cond (tree cond, vec_info *vinfo,
8465 tree *comp_vectype, enum vect_def_type *dts,
8466 tree vectype)
8468 tree lhs, rhs;
8469 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8471 /* Mask case. */
8472 if (TREE_CODE (cond) == SSA_NAME
8473 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
8475 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
8476 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
8477 &dts[0], comp_vectype)
8478 || !*comp_vectype
8479 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8480 return false;
8481 return true;
8484 if (!COMPARISON_CLASS_P (cond))
8485 return false;
8487 lhs = TREE_OPERAND (cond, 0);
8488 rhs = TREE_OPERAND (cond, 1);
8490 if (TREE_CODE (lhs) == SSA_NAME)
8492 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
8493 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
8494 return false;
8496 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8497 || TREE_CODE (lhs) == FIXED_CST)
8498 dts[0] = vect_constant_def;
8499 else
8500 return false;
8502 if (TREE_CODE (rhs) == SSA_NAME)
8504 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
8505 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
8506 return false;
8508 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8509 || TREE_CODE (rhs) == FIXED_CST)
8510 dts[1] = vect_constant_def;
8511 else
8512 return false;
8514 if (vectype1 && vectype2
8515 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8516 TYPE_VECTOR_SUBPARTS (vectype2)))
8517 return false;
8519 *comp_vectype = vectype1 ? vectype1 : vectype2;
8520 /* Invariant comparison. */
8521 if (! *comp_vectype)
8523 tree scalar_type = TREE_TYPE (lhs);
8524 /* If we can widen the comparison to match vectype do so. */
8525 if (INTEGRAL_TYPE_P (scalar_type)
8526 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8527 TYPE_SIZE (TREE_TYPE (vectype))))
8528 scalar_type = build_nonstandard_integer_type
8529 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8530 TYPE_UNSIGNED (scalar_type));
8531 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8534 return true;
8537 /* vectorizable_condition.
8539 Check if STMT is conditional modify expression that can be vectorized.
8540 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8541 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
8542 at GSI.
8544 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
8545 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
8546 else clause if it is 2).
8548 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8550 bool
8551 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
8552 gimple **vec_stmt, tree reduc_def, int reduc_index,
8553 slp_tree slp_node)
8555 tree scalar_dest = NULL_TREE;
8556 tree vec_dest = NULL_TREE;
8557 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8558 tree then_clause, else_clause;
8559 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8560 tree comp_vectype = NULL_TREE;
8561 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8562 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
8563 tree vec_compare;
8564 tree new_temp;
8565 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8566 enum vect_def_type dts[4]
8567 = {vect_unknown_def_type, vect_unknown_def_type,
8568 vect_unknown_def_type, vect_unknown_def_type};
8569 int ndts = 4;
8570 int ncopies;
8571 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8572 stmt_vec_info prev_stmt_info = NULL;
8573 int i, j;
8574 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8575 vec<tree> vec_oprnds0 = vNULL;
8576 vec<tree> vec_oprnds1 = vNULL;
8577 vec<tree> vec_oprnds2 = vNULL;
8578 vec<tree> vec_oprnds3 = vNULL;
8579 tree vec_cmp_type;
8580 bool masked = false;
8582 if (reduc_index && STMT_SLP_TYPE (stmt_info))
8583 return false;
8585 vect_reduction_type reduction_type
8586 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8587 if (reduction_type == TREE_CODE_REDUCTION)
8589 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8590 return false;
8592 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8593 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8594 && reduc_def))
8595 return false;
8597 /* FORNOW: not yet supported. */
8598 if (STMT_VINFO_LIVE_P (stmt_info))
8600 if (dump_enabled_p ())
8601 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8602 "value used after loop.\n");
8603 return false;
8607 /* Is vectorizable conditional operation? */
8608 if (!is_gimple_assign (stmt))
8609 return false;
8611 code = gimple_assign_rhs_code (stmt);
8613 if (code != COND_EXPR)
8614 return false;
8616 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8617 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8619 if (slp_node)
8620 ncopies = 1;
8621 else
8622 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8624 gcc_assert (ncopies >= 1);
8625 if (reduc_index && ncopies > 1)
8626 return false; /* FORNOW */
8628 cond_expr = gimple_assign_rhs1 (stmt);
8629 then_clause = gimple_assign_rhs2 (stmt);
8630 else_clause = gimple_assign_rhs3 (stmt);
8632 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8633 &comp_vectype, &dts[0], vectype)
8634 || !comp_vectype)
8635 return false;
8637 gimple *def_stmt;
8638 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
8639 &vectype1))
8640 return false;
8641 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
8642 &vectype2))
8643 return false;
8645 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8646 return false;
8648 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
8649 return false;
8651 masked = !COMPARISON_CLASS_P (cond_expr);
8652 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8654 if (vec_cmp_type == NULL_TREE)
8655 return false;
8657 cond_code = TREE_CODE (cond_expr);
8658 if (!masked)
8660 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8661 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8664 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8666 /* Boolean values may have another representation in vectors
8667 and therefore we prefer bit operations over comparison for
8668 them (which also works for scalar masks). We store opcodes
8669 to use in bitop1 and bitop2. Statement is vectorized as
8670 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8671 depending on bitop1 and bitop2 arity. */
8672 switch (cond_code)
8674 case GT_EXPR:
8675 bitop1 = BIT_NOT_EXPR;
8676 bitop2 = BIT_AND_EXPR;
8677 break;
8678 case GE_EXPR:
8679 bitop1 = BIT_NOT_EXPR;
8680 bitop2 = BIT_IOR_EXPR;
8681 break;
8682 case LT_EXPR:
8683 bitop1 = BIT_NOT_EXPR;
8684 bitop2 = BIT_AND_EXPR;
8685 std::swap (cond_expr0, cond_expr1);
8686 break;
8687 case LE_EXPR:
8688 bitop1 = BIT_NOT_EXPR;
8689 bitop2 = BIT_IOR_EXPR;
8690 std::swap (cond_expr0, cond_expr1);
8691 break;
8692 case NE_EXPR:
8693 bitop1 = BIT_XOR_EXPR;
8694 break;
8695 case EQ_EXPR:
8696 bitop1 = BIT_XOR_EXPR;
8697 bitop2 = BIT_NOT_EXPR;
8698 break;
8699 default:
8700 return false;
8702 cond_code = SSA_NAME;
8705 if (!vec_stmt)
8707 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8708 if (bitop1 != NOP_EXPR)
8710 machine_mode mode = TYPE_MODE (comp_vectype);
8711 optab optab;
8713 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8714 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8715 return false;
8717 if (bitop2 != NOP_EXPR)
8719 optab = optab_for_tree_code (bitop2, comp_vectype,
8720 optab_default);
8721 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8722 return false;
8725 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8726 cond_code))
8728 if (!slp_node)
8729 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
8730 return true;
8732 return false;
8735 /* Transform. */
8737 if (!slp_node)
8739 vec_oprnds0.create (1);
8740 vec_oprnds1.create (1);
8741 vec_oprnds2.create (1);
8742 vec_oprnds3.create (1);
8745 /* Handle def. */
8746 scalar_dest = gimple_assign_lhs (stmt);
8747 if (reduction_type != EXTRACT_LAST_REDUCTION)
8748 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8750 /* Handle cond expr. */
8751 for (j = 0; j < ncopies; j++)
8753 gimple *new_stmt = NULL;
8754 if (j == 0)
8756 if (slp_node)
8758 auto_vec<tree, 4> ops;
8759 auto_vec<vec<tree>, 4> vec_defs;
8761 if (masked)
8762 ops.safe_push (cond_expr);
8763 else
8765 ops.safe_push (cond_expr0);
8766 ops.safe_push (cond_expr1);
8768 ops.safe_push (then_clause);
8769 ops.safe_push (else_clause);
8770 vect_get_slp_defs (ops, slp_node, &vec_defs);
8771 vec_oprnds3 = vec_defs.pop ();
8772 vec_oprnds2 = vec_defs.pop ();
8773 if (!masked)
8774 vec_oprnds1 = vec_defs.pop ();
8775 vec_oprnds0 = vec_defs.pop ();
8777 else
8779 gimple *gtemp;
8780 if (masked)
8782 vec_cond_lhs
8783 = vect_get_vec_def_for_operand (cond_expr, stmt,
8784 comp_vectype);
8785 vect_is_simple_use (cond_expr, stmt_info->vinfo,
8786 &gtemp, &dts[0]);
8788 else
8790 vec_cond_lhs
8791 = vect_get_vec_def_for_operand (cond_expr0,
8792 stmt, comp_vectype);
8793 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8795 vec_cond_rhs
8796 = vect_get_vec_def_for_operand (cond_expr1,
8797 stmt, comp_vectype);
8798 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
8800 if (reduc_index == 1)
8801 vec_then_clause = reduc_def;
8802 else
8804 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8805 stmt);
8806 vect_is_simple_use (then_clause, loop_vinfo,
8807 &gtemp, &dts[2]);
8809 if (reduc_index == 2)
8810 vec_else_clause = reduc_def;
8811 else
8813 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8814 stmt);
8815 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
8819 else
8821 vec_cond_lhs
8822 = vect_get_vec_def_for_stmt_copy (dts[0],
8823 vec_oprnds0.pop ());
8824 if (!masked)
8825 vec_cond_rhs
8826 = vect_get_vec_def_for_stmt_copy (dts[1],
8827 vec_oprnds1.pop ());
8829 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8830 vec_oprnds2.pop ());
8831 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8832 vec_oprnds3.pop ());
8835 if (!slp_node)
8837 vec_oprnds0.quick_push (vec_cond_lhs);
8838 if (!masked)
8839 vec_oprnds1.quick_push (vec_cond_rhs);
8840 vec_oprnds2.quick_push (vec_then_clause);
8841 vec_oprnds3.quick_push (vec_else_clause);
8844 /* Arguments are ready. Create the new vector stmt. */
8845 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8847 vec_then_clause = vec_oprnds2[i];
8848 vec_else_clause = vec_oprnds3[i];
8850 if (masked)
8851 vec_compare = vec_cond_lhs;
8852 else
8854 vec_cond_rhs = vec_oprnds1[i];
8855 if (bitop1 == NOP_EXPR)
8856 vec_compare = build2 (cond_code, vec_cmp_type,
8857 vec_cond_lhs, vec_cond_rhs);
8858 else
8860 new_temp = make_ssa_name (vec_cmp_type);
8861 if (bitop1 == BIT_NOT_EXPR)
8862 new_stmt = gimple_build_assign (new_temp, bitop1,
8863 vec_cond_rhs);
8864 else
8865 new_stmt
8866 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8867 vec_cond_rhs);
8868 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8869 if (bitop2 == NOP_EXPR)
8870 vec_compare = new_temp;
8871 else if (bitop2 == BIT_NOT_EXPR)
8873 /* Instead of doing ~x ? y : z do x ? z : y. */
8874 vec_compare = new_temp;
8875 std::swap (vec_then_clause, vec_else_clause);
8877 else
8879 vec_compare = make_ssa_name (vec_cmp_type);
8880 new_stmt
8881 = gimple_build_assign (vec_compare, bitop2,
8882 vec_cond_lhs, new_temp);
8883 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8887 if (reduction_type == EXTRACT_LAST_REDUCTION)
8889 if (!is_gimple_val (vec_compare))
8891 tree vec_compare_name = make_ssa_name (vec_cmp_type);
8892 new_stmt = gimple_build_assign (vec_compare_name,
8893 vec_compare);
8894 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8895 vec_compare = vec_compare_name;
8897 gcc_assert (reduc_index == 2);
8898 new_stmt = gimple_build_call_internal
8899 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
8900 vec_then_clause);
8901 gimple_call_set_lhs (new_stmt, scalar_dest);
8902 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
8903 if (stmt == gsi_stmt (*gsi))
8904 vect_finish_replace_stmt (stmt, new_stmt);
8905 else
8907 /* In this case we're moving the definition to later in the
8908 block. That doesn't matter because the only uses of the
8909 lhs are in phi statements. */
8910 gimple_stmt_iterator old_gsi = gsi_for_stmt (stmt);
8911 gsi_remove (&old_gsi, true);
8912 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8915 else
8917 new_temp = make_ssa_name (vec_dest);
8918 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8919 vec_compare, vec_then_clause,
8920 vec_else_clause);
8921 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8923 if (slp_node)
8924 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8927 if (slp_node)
8928 continue;
8930 if (j == 0)
8931 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8932 else
8933 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8935 prev_stmt_info = vinfo_for_stmt (new_stmt);
8938 vec_oprnds0.release ();
8939 vec_oprnds1.release ();
8940 vec_oprnds2.release ();
8941 vec_oprnds3.release ();
8943 return true;
8946 /* vectorizable_comparison.
8948 Check if STMT is comparison expression that can be vectorized.
8949 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8950 comparison, put it in VEC_STMT, and insert it at GSI.
8952 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8954 static bool
8955 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8956 gimple **vec_stmt, tree reduc_def,
8957 slp_tree slp_node)
8959 tree lhs, rhs1, rhs2;
8960 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8961 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8962 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8963 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8964 tree new_temp;
8965 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8966 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
8967 int ndts = 2;
8968 poly_uint64 nunits;
8969 int ncopies;
8970 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8971 stmt_vec_info prev_stmt_info = NULL;
8972 int i, j;
8973 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8974 vec<tree> vec_oprnds0 = vNULL;
8975 vec<tree> vec_oprnds1 = vNULL;
8976 gimple *def_stmt;
8977 tree mask_type;
8978 tree mask;
8980 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8981 return false;
8983 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
8984 return false;
8986 mask_type = vectype;
8987 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8989 if (slp_node)
8990 ncopies = 1;
8991 else
8992 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8994 gcc_assert (ncopies >= 1);
8995 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8996 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8997 && reduc_def))
8998 return false;
9000 if (STMT_VINFO_LIVE_P (stmt_info))
9002 if (dump_enabled_p ())
9003 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9004 "value used after loop.\n");
9005 return false;
9008 if (!is_gimple_assign (stmt))
9009 return false;
9011 code = gimple_assign_rhs_code (stmt);
9013 if (TREE_CODE_CLASS (code) != tcc_comparison)
9014 return false;
9016 rhs1 = gimple_assign_rhs1 (stmt);
9017 rhs2 = gimple_assign_rhs2 (stmt);
9019 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
9020 &dts[0], &vectype1))
9021 return false;
9023 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
9024 &dts[1], &vectype2))
9025 return false;
9027 if (vectype1 && vectype2
9028 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9029 TYPE_VECTOR_SUBPARTS (vectype2)))
9030 return false;
9032 vectype = vectype1 ? vectype1 : vectype2;
9034 /* Invariant comparison. */
9035 if (!vectype)
9037 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
9038 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
9039 return false;
9041 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
9042 return false;
9044 /* Can't compare mask and non-mask types. */
9045 if (vectype1 && vectype2
9046 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9047 return false;
9049 /* Boolean values may have another representation in vectors
9050 and therefore we prefer bit operations over comparison for
9051 them (which also works for scalar masks). We store opcodes
9052 to use in bitop1 and bitop2. Statement is vectorized as
9053 BITOP2 (rhs1 BITOP1 rhs2) or
9054 rhs1 BITOP2 (BITOP1 rhs2)
9055 depending on bitop1 and bitop2 arity. */
9056 if (VECTOR_BOOLEAN_TYPE_P (vectype))
9058 if (code == GT_EXPR)
9060 bitop1 = BIT_NOT_EXPR;
9061 bitop2 = BIT_AND_EXPR;
9063 else if (code == GE_EXPR)
9065 bitop1 = BIT_NOT_EXPR;
9066 bitop2 = BIT_IOR_EXPR;
9068 else if (code == LT_EXPR)
9070 bitop1 = BIT_NOT_EXPR;
9071 bitop2 = BIT_AND_EXPR;
9072 std::swap (rhs1, rhs2);
9073 std::swap (dts[0], dts[1]);
9075 else if (code == LE_EXPR)
9077 bitop1 = BIT_NOT_EXPR;
9078 bitop2 = BIT_IOR_EXPR;
9079 std::swap (rhs1, rhs2);
9080 std::swap (dts[0], dts[1]);
9082 else
9084 bitop1 = BIT_XOR_EXPR;
9085 if (code == EQ_EXPR)
9086 bitop2 = BIT_NOT_EXPR;
9090 if (!vec_stmt)
9092 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9093 if (!slp_node)
9094 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9095 dts, ndts, NULL, NULL);
9096 if (bitop1 == NOP_EXPR)
9097 return expand_vec_cmp_expr_p (vectype, mask_type, code);
9098 else
9100 machine_mode mode = TYPE_MODE (vectype);
9101 optab optab;
9103 optab = optab_for_tree_code (bitop1, vectype, optab_default);
9104 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9105 return false;
9107 if (bitop2 != NOP_EXPR)
9109 optab = optab_for_tree_code (bitop2, vectype, optab_default);
9110 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9111 return false;
9113 return true;
9117 /* Transform. */
9118 if (!slp_node)
9120 vec_oprnds0.create (1);
9121 vec_oprnds1.create (1);
9124 /* Handle def. */
9125 lhs = gimple_assign_lhs (stmt);
9126 mask = vect_create_destination_var (lhs, mask_type);
9128 /* Handle cmp expr. */
9129 for (j = 0; j < ncopies; j++)
9131 gassign *new_stmt = NULL;
9132 if (j == 0)
9134 if (slp_node)
9136 auto_vec<tree, 2> ops;
9137 auto_vec<vec<tree>, 2> vec_defs;
9139 ops.safe_push (rhs1);
9140 ops.safe_push (rhs2);
9141 vect_get_slp_defs (ops, slp_node, &vec_defs);
9142 vec_oprnds1 = vec_defs.pop ();
9143 vec_oprnds0 = vec_defs.pop ();
9145 else
9147 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
9148 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
9151 else
9153 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
9154 vec_oprnds0.pop ());
9155 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
9156 vec_oprnds1.pop ());
9159 if (!slp_node)
9161 vec_oprnds0.quick_push (vec_rhs1);
9162 vec_oprnds1.quick_push (vec_rhs2);
9165 /* Arguments are ready. Create the new vector stmt. */
9166 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9168 vec_rhs2 = vec_oprnds1[i];
9170 new_temp = make_ssa_name (mask);
9171 if (bitop1 == NOP_EXPR)
9173 new_stmt = gimple_build_assign (new_temp, code,
9174 vec_rhs1, vec_rhs2);
9175 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9177 else
9179 if (bitop1 == BIT_NOT_EXPR)
9180 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9181 else
9182 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9183 vec_rhs2);
9184 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9185 if (bitop2 != NOP_EXPR)
9187 tree res = make_ssa_name (mask);
9188 if (bitop2 == BIT_NOT_EXPR)
9189 new_stmt = gimple_build_assign (res, bitop2, new_temp);
9190 else
9191 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9192 new_temp);
9193 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9196 if (slp_node)
9197 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9200 if (slp_node)
9201 continue;
9203 if (j == 0)
9204 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
9205 else
9206 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
9208 prev_stmt_info = vinfo_for_stmt (new_stmt);
9211 vec_oprnds0.release ();
9212 vec_oprnds1.release ();
9214 return true;
9217 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9218 can handle all live statements in the node. Otherwise return true
9219 if STMT is not live or if vectorizable_live_operation can handle it.
9220 GSI and VEC_STMT are as for vectorizable_live_operation. */
9222 static bool
9223 can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
9224 slp_tree slp_node, gimple **vec_stmt)
9226 if (slp_node)
9228 gimple *slp_stmt;
9229 unsigned int i;
9230 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
9232 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
9233 if (STMT_VINFO_LIVE_P (slp_stmt_info)
9234 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
9235 vec_stmt))
9236 return false;
9239 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
9240 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
9241 return false;
9243 return true;
9246 /* Make sure the statement is vectorizable. */
9248 bool
9249 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
9250 slp_instance node_instance)
9252 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9253 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9254 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
9255 bool ok;
9256 gimple *pattern_stmt;
9257 gimple_seq pattern_def_seq;
9259 if (dump_enabled_p ())
9261 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
9262 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9265 if (gimple_has_volatile_ops (stmt))
9267 if (dump_enabled_p ())
9268 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9269 "not vectorized: stmt has volatile operands\n");
9271 return false;
9274 /* Skip stmts that do not need to be vectorized. In loops this is expected
9275 to include:
9276 - the COND_EXPR which is the loop exit condition
9277 - any LABEL_EXPRs in the loop
9278 - computations that are used only for array indexing or loop control.
9279 In basic blocks we only analyze statements that are a part of some SLP
9280 instance, therefore, all the statements are relevant.
9282 Pattern statement needs to be analyzed instead of the original statement
9283 if the original statement is not relevant. Otherwise, we analyze both
9284 statements. In basic blocks we are called from some SLP instance
9285 traversal, don't analyze pattern stmts instead, the pattern stmts
9286 already will be part of SLP instance. */
9288 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
9289 if (!STMT_VINFO_RELEVANT_P (stmt_info)
9290 && !STMT_VINFO_LIVE_P (stmt_info))
9292 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9293 && pattern_stmt
9294 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9295 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9297 /* Analyze PATTERN_STMT instead of the original stmt. */
9298 stmt = pattern_stmt;
9299 stmt_info = vinfo_for_stmt (pattern_stmt);
9300 if (dump_enabled_p ())
9302 dump_printf_loc (MSG_NOTE, vect_location,
9303 "==> examining pattern statement: ");
9304 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9307 else
9309 if (dump_enabled_p ())
9310 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
9312 return true;
9315 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9316 && node == NULL
9317 && pattern_stmt
9318 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9319 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9321 /* Analyze PATTERN_STMT too. */
9322 if (dump_enabled_p ())
9324 dump_printf_loc (MSG_NOTE, vect_location,
9325 "==> examining pattern statement: ");
9326 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9329 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
9330 node_instance))
9331 return false;
9334 if (is_pattern_stmt_p (stmt_info)
9335 && node == NULL
9336 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
9338 gimple_stmt_iterator si;
9340 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9342 gimple *pattern_def_stmt = gsi_stmt (si);
9343 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
9344 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
9346 /* Analyze def stmt of STMT if it's a pattern stmt. */
9347 if (dump_enabled_p ())
9349 dump_printf_loc (MSG_NOTE, vect_location,
9350 "==> examining pattern def statement: ");
9351 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
9354 if (!vect_analyze_stmt (pattern_def_stmt,
9355 need_to_vectorize, node, node_instance))
9356 return false;
9361 switch (STMT_VINFO_DEF_TYPE (stmt_info))
9363 case vect_internal_def:
9364 break;
9366 case vect_reduction_def:
9367 case vect_nested_cycle:
9368 gcc_assert (!bb_vinfo
9369 && (relevance == vect_used_in_outer
9370 || relevance == vect_used_in_outer_by_reduction
9371 || relevance == vect_used_by_reduction
9372 || relevance == vect_unused_in_scope
9373 || relevance == vect_used_only_live));
9374 break;
9376 case vect_induction_def:
9377 gcc_assert (!bb_vinfo);
9378 break;
9380 case vect_constant_def:
9381 case vect_external_def:
9382 case vect_unknown_def_type:
9383 default:
9384 gcc_unreachable ();
9387 if (STMT_VINFO_RELEVANT_P (stmt_info))
9389 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
9390 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9391 || (is_gimple_call (stmt)
9392 && gimple_call_lhs (stmt) == NULL_TREE));
9393 *need_to_vectorize = true;
9396 if (PURE_SLP_STMT (stmt_info) && !node)
9398 dump_printf_loc (MSG_NOTE, vect_location,
9399 "handled only by SLP analysis\n");
9400 return true;
9403 ok = true;
9404 if (!bb_vinfo
9405 && (STMT_VINFO_RELEVANT_P (stmt_info)
9406 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
9407 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
9408 || vectorizable_conversion (stmt, NULL, NULL, node)
9409 || vectorizable_shift (stmt, NULL, NULL, node)
9410 || vectorizable_operation (stmt, NULL, NULL, node)
9411 || vectorizable_assignment (stmt, NULL, NULL, node)
9412 || vectorizable_load (stmt, NULL, NULL, node, NULL)
9413 || vectorizable_call (stmt, NULL, NULL, node)
9414 || vectorizable_store (stmt, NULL, NULL, node)
9415 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
9416 || vectorizable_induction (stmt, NULL, NULL, node)
9417 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
9418 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
9419 else
9421 if (bb_vinfo)
9422 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
9423 || vectorizable_conversion (stmt, NULL, NULL, node)
9424 || vectorizable_shift (stmt, NULL, NULL, node)
9425 || vectorizable_operation (stmt, NULL, NULL, node)
9426 || vectorizable_assignment (stmt, NULL, NULL, node)
9427 || vectorizable_load (stmt, NULL, NULL, node, NULL)
9428 || vectorizable_call (stmt, NULL, NULL, node)
9429 || vectorizable_store (stmt, NULL, NULL, node)
9430 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
9431 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
9434 if (!ok)
9436 if (dump_enabled_p ())
9438 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9439 "not vectorized: relevant stmt not ");
9440 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
9441 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
9444 return false;
9447 if (bb_vinfo)
9448 return true;
9450 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9451 need extra handling, except for vectorizable reductions. */
9452 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9453 && !can_vectorize_live_stmts (stmt, NULL, node, NULL))
9455 if (dump_enabled_p ())
9457 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9458 "not vectorized: live stmt not supported: ");
9459 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
9462 return false;
9465 return true;
9469 /* Function vect_transform_stmt.
9471 Create a vectorized stmt to replace STMT, and insert it at BSI. */
9473 bool
9474 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
9475 bool *grouped_store, slp_tree slp_node,
9476 slp_instance slp_node_instance)
9478 bool is_store = false;
9479 gimple *vec_stmt = NULL;
9480 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9481 bool done;
9483 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
9484 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9486 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9487 && nested_in_vect_loop_p
9488 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9489 stmt));
9491 switch (STMT_VINFO_TYPE (stmt_info))
9493 case type_demotion_vec_info_type:
9494 case type_promotion_vec_info_type:
9495 case type_conversion_vec_info_type:
9496 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
9497 gcc_assert (done);
9498 break;
9500 case induc_vec_info_type:
9501 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
9502 gcc_assert (done);
9503 break;
9505 case shift_vec_info_type:
9506 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
9507 gcc_assert (done);
9508 break;
9510 case op_vec_info_type:
9511 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
9512 gcc_assert (done);
9513 break;
9515 case assignment_vec_info_type:
9516 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
9517 gcc_assert (done);
9518 break;
9520 case load_vec_info_type:
9521 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
9522 slp_node_instance);
9523 gcc_assert (done);
9524 break;
9526 case store_vec_info_type:
9527 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
9528 gcc_assert (done);
9529 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
9531 /* In case of interleaving, the whole chain is vectorized when the
9532 last store in the chain is reached. Store stmts before the last
9533 one are skipped, and there vec_stmt_info shouldn't be freed
9534 meanwhile. */
9535 *grouped_store = true;
9536 stmt_vec_info group_info
9537 = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
9538 if (GROUP_STORE_COUNT (group_info) == GROUP_SIZE (group_info))
9539 is_store = true;
9541 else
9542 is_store = true;
9543 break;
9545 case condition_vec_info_type:
9546 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
9547 gcc_assert (done);
9548 break;
9550 case comparison_vec_info_type:
9551 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
9552 gcc_assert (done);
9553 break;
9555 case call_vec_info_type:
9556 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
9557 stmt = gsi_stmt (*gsi);
9558 break;
9560 case call_simd_clone_vec_info_type:
9561 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
9562 stmt = gsi_stmt (*gsi);
9563 break;
9565 case reduc_vec_info_type:
9566 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
9567 slp_node_instance);
9568 gcc_assert (done);
9569 break;
9571 default:
9572 if (!STMT_VINFO_LIVE_P (stmt_info))
9574 if (dump_enabled_p ())
9575 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9576 "stmt not supported.\n");
9577 gcc_unreachable ();
9581 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9582 This would break hybrid SLP vectorization. */
9583 if (slp_node)
9584 gcc_assert (!vec_stmt
9585 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
9587 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9588 is being vectorized, but outside the immediately enclosing loop. */
9589 if (vec_stmt
9590 && nested_p
9591 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9592 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
9593 || STMT_VINFO_RELEVANT (stmt_info) ==
9594 vect_used_in_outer_by_reduction))
9596 struct loop *innerloop = LOOP_VINFO_LOOP (
9597 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
9598 imm_use_iterator imm_iter;
9599 use_operand_p use_p;
9600 tree scalar_dest;
9601 gimple *exit_phi;
9603 if (dump_enabled_p ())
9604 dump_printf_loc (MSG_NOTE, vect_location,
9605 "Record the vdef for outer-loop vectorization.\n");
9607 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9608 (to be used when vectorizing outer-loop stmts that use the DEF of
9609 STMT). */
9610 if (gimple_code (stmt) == GIMPLE_PHI)
9611 scalar_dest = PHI_RESULT (stmt);
9612 else
9613 scalar_dest = gimple_assign_lhs (stmt);
9615 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9617 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9619 exit_phi = USE_STMT (use_p);
9620 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
9625 /* Handle stmts whose DEF is used outside the loop-nest that is
9626 being vectorized. */
9627 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
9629 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
9630 gcc_assert (done);
9633 if (vec_stmt)
9634 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
9636 return is_store;
9640 /* Remove a group of stores (for SLP or interleaving), free their
9641 stmt_vec_info. */
9643 void
9644 vect_remove_stores (gimple *first_stmt)
9646 gimple *next = first_stmt;
9647 gimple *tmp;
9648 gimple_stmt_iterator next_si;
9650 while (next)
9652 stmt_vec_info stmt_info = vinfo_for_stmt (next);
9654 tmp = GROUP_NEXT_ELEMENT (stmt_info);
9655 if (is_pattern_stmt_p (stmt_info))
9656 next = STMT_VINFO_RELATED_STMT (stmt_info);
9657 /* Free the attached stmt_vec_info and remove the stmt. */
9658 next_si = gsi_for_stmt (next);
9659 unlink_stmt_vdef (next);
9660 gsi_remove (&next_si, true);
9661 release_defs (next);
9662 free_stmt_vec_info (next);
9663 next = tmp;
9668 /* Function new_stmt_vec_info.
9670 Create and initialize a new stmt_vec_info struct for STMT. */
9672 stmt_vec_info
9673 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
9675 stmt_vec_info res;
9676 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
9678 STMT_VINFO_TYPE (res) = undef_vec_info_type;
9679 STMT_VINFO_STMT (res) = stmt;
9680 res->vinfo = vinfo;
9681 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
9682 STMT_VINFO_LIVE_P (res) = false;
9683 STMT_VINFO_VECTYPE (res) = NULL;
9684 STMT_VINFO_VEC_STMT (res) = NULL;
9685 STMT_VINFO_VECTORIZABLE (res) = true;
9686 STMT_VINFO_IN_PATTERN_P (res) = false;
9687 STMT_VINFO_RELATED_STMT (res) = NULL;
9688 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
9689 STMT_VINFO_DATA_REF (res) = NULL;
9690 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
9691 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
9693 if (gimple_code (stmt) == GIMPLE_PHI
9694 && is_loop_header_bb_p (gimple_bb (stmt)))
9695 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9696 else
9697 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9699 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
9700 STMT_SLP_TYPE (res) = loop_vect;
9701 STMT_VINFO_NUM_SLP_USES (res) = 0;
9703 GROUP_FIRST_ELEMENT (res) = NULL;
9704 GROUP_NEXT_ELEMENT (res) = NULL;
9705 GROUP_SIZE (res) = 0;
9706 GROUP_STORE_COUNT (res) = 0;
9707 GROUP_GAP (res) = 0;
9708 GROUP_SAME_DR_STMT (res) = NULL;
9710 return res;
9714 /* Create a hash table for stmt_vec_info. */
9716 void
9717 init_stmt_vec_info_vec (void)
9719 gcc_assert (!stmt_vec_info_vec.exists ());
9720 stmt_vec_info_vec.create (50);
9724 /* Free hash table for stmt_vec_info. */
9726 void
9727 free_stmt_vec_info_vec (void)
9729 unsigned int i;
9730 stmt_vec_info info;
9731 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
9732 if (info != NULL)
9733 free_stmt_vec_info (STMT_VINFO_STMT (info));
9734 gcc_assert (stmt_vec_info_vec.exists ());
9735 stmt_vec_info_vec.release ();
9739 /* Free stmt vectorization related info. */
9741 void
9742 free_stmt_vec_info (gimple *stmt)
9744 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9746 if (!stmt_info)
9747 return;
9749 /* Check if this statement has a related "pattern stmt"
9750 (introduced by the vectorizer during the pattern recognition
9751 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9752 too. */
9753 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9755 stmt_vec_info patt_info
9756 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9757 if (patt_info)
9759 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
9760 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
9761 gimple_set_bb (patt_stmt, NULL);
9762 tree lhs = gimple_get_lhs (patt_stmt);
9763 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9764 release_ssa_name (lhs);
9765 if (seq)
9767 gimple_stmt_iterator si;
9768 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
9770 gimple *seq_stmt = gsi_stmt (si);
9771 gimple_set_bb (seq_stmt, NULL);
9772 lhs = gimple_get_lhs (seq_stmt);
9773 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9774 release_ssa_name (lhs);
9775 free_stmt_vec_info (seq_stmt);
9778 free_stmt_vec_info (patt_stmt);
9782 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
9783 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
9784 set_vinfo_for_stmt (stmt, NULL);
9785 free (stmt_info);
9789 /* Function get_vectype_for_scalar_type_and_size.
9791 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9792 by the target. */
9794 tree
9795 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
9797 tree orig_scalar_type = scalar_type;
9798 scalar_mode inner_mode;
9799 machine_mode simd_mode;
9800 poly_uint64 nunits;
9801 tree vectype;
9803 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9804 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9805 return NULL_TREE;
9807 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9809 /* For vector types of elements whose mode precision doesn't
9810 match their types precision we use a element type of mode
9811 precision. The vectorization routines will have to make sure
9812 they support the proper result truncation/extension.
9813 We also make sure to build vector types with INTEGER_TYPE
9814 component type only. */
9815 if (INTEGRAL_TYPE_P (scalar_type)
9816 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9817 || TREE_CODE (scalar_type) != INTEGER_TYPE))
9818 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9819 TYPE_UNSIGNED (scalar_type));
9821 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9822 When the component mode passes the above test simply use a type
9823 corresponding to that mode. The theory is that any use that
9824 would cause problems with this will disable vectorization anyway. */
9825 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9826 && !INTEGRAL_TYPE_P (scalar_type))
9827 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9829 /* We can't build a vector type of elements with alignment bigger than
9830 their size. */
9831 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9832 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9833 TYPE_UNSIGNED (scalar_type));
9835 /* If we felt back to using the mode fail if there was
9836 no scalar type for it. */
9837 if (scalar_type == NULL_TREE)
9838 return NULL_TREE;
9840 /* If no size was supplied use the mode the target prefers. Otherwise
9841 lookup a vector mode of the specified size. */
9842 if (known_eq (size, 0U))
9843 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9844 else if (!multiple_p (size, nbytes, &nunits)
9845 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9846 return NULL_TREE;
9847 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9848 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
9849 return NULL_TREE;
9851 vectype = build_vector_type (scalar_type, nunits);
9853 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9854 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9855 return NULL_TREE;
9857 /* Re-attach the address-space qualifier if we canonicalized the scalar
9858 type. */
9859 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9860 return build_qualified_type
9861 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9863 return vectype;
9866 poly_uint64 current_vector_size;
9868 /* Function get_vectype_for_scalar_type.
9870 Returns the vector type corresponding to SCALAR_TYPE as supported
9871 by the target. */
9873 tree
9874 get_vectype_for_scalar_type (tree scalar_type)
9876 tree vectype;
9877 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9878 current_vector_size);
9879 if (vectype
9880 && known_eq (current_vector_size, 0U))
9881 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9882 return vectype;
9885 /* Function get_mask_type_for_scalar_type.
9887 Returns the mask type corresponding to a result of comparison
9888 of vectors of specified SCALAR_TYPE as supported by target. */
9890 tree
9891 get_mask_type_for_scalar_type (tree scalar_type)
9893 tree vectype = get_vectype_for_scalar_type (scalar_type);
9895 if (!vectype)
9896 return NULL;
9898 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9899 current_vector_size);
9902 /* Function get_same_sized_vectype
9904 Returns a vector type corresponding to SCALAR_TYPE of size
9905 VECTOR_TYPE if supported by the target. */
9907 tree
9908 get_same_sized_vectype (tree scalar_type, tree vector_type)
9910 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9911 return build_same_sized_truth_vector_type (vector_type);
9913 return get_vectype_for_scalar_type_and_size
9914 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9917 /* Function vect_is_simple_use.
9919 Input:
9920 VINFO - the vect info of the loop or basic block that is being vectorized.
9921 OPERAND - operand in the loop or bb.
9922 Output:
9923 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9924 DT - the type of definition
9926 Returns whether a stmt with OPERAND can be vectorized.
9927 For loops, supportable operands are constants, loop invariants, and operands
9928 that are defined by the current iteration of the loop. Unsupportable
9929 operands are those that are defined by a previous iteration of the loop (as
9930 is the case in reduction/induction computations).
9931 For basic blocks, supportable operands are constants and bb invariants.
9932 For now, operands defined outside the basic block are not supported. */
9934 bool
9935 vect_is_simple_use (tree operand, vec_info *vinfo,
9936 gimple **def_stmt, enum vect_def_type *dt)
9938 *def_stmt = NULL;
9939 *dt = vect_unknown_def_type;
9941 if (dump_enabled_p ())
9943 dump_printf_loc (MSG_NOTE, vect_location,
9944 "vect_is_simple_use: operand ");
9945 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9946 dump_printf (MSG_NOTE, "\n");
9949 if (CONSTANT_CLASS_P (operand))
9951 *dt = vect_constant_def;
9952 return true;
9955 if (is_gimple_min_invariant (operand))
9957 *dt = vect_external_def;
9958 return true;
9961 if (TREE_CODE (operand) != SSA_NAME)
9963 if (dump_enabled_p ())
9964 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9965 "not ssa-name.\n");
9966 return false;
9969 if (SSA_NAME_IS_DEFAULT_DEF (operand))
9971 *dt = vect_external_def;
9972 return true;
9975 *def_stmt = SSA_NAME_DEF_STMT (operand);
9976 if (dump_enabled_p ())
9978 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9979 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
9982 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
9983 *dt = vect_external_def;
9984 else
9986 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
9987 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9990 if (dump_enabled_p ())
9992 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9993 switch (*dt)
9995 case vect_uninitialized_def:
9996 dump_printf (MSG_NOTE, "uninitialized\n");
9997 break;
9998 case vect_constant_def:
9999 dump_printf (MSG_NOTE, "constant\n");
10000 break;
10001 case vect_external_def:
10002 dump_printf (MSG_NOTE, "external\n");
10003 break;
10004 case vect_internal_def:
10005 dump_printf (MSG_NOTE, "internal\n");
10006 break;
10007 case vect_induction_def:
10008 dump_printf (MSG_NOTE, "induction\n");
10009 break;
10010 case vect_reduction_def:
10011 dump_printf (MSG_NOTE, "reduction\n");
10012 break;
10013 case vect_double_reduction_def:
10014 dump_printf (MSG_NOTE, "double reduction\n");
10015 break;
10016 case vect_nested_cycle:
10017 dump_printf (MSG_NOTE, "nested cycle\n");
10018 break;
10019 case vect_unknown_def_type:
10020 dump_printf (MSG_NOTE, "unknown\n");
10021 break;
10025 if (*dt == vect_unknown_def_type)
10027 if (dump_enabled_p ())
10028 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10029 "Unsupported pattern.\n");
10030 return false;
10033 switch (gimple_code (*def_stmt))
10035 case GIMPLE_PHI:
10036 case GIMPLE_ASSIGN:
10037 case GIMPLE_CALL:
10038 break;
10039 default:
10040 if (dump_enabled_p ())
10041 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10042 "unsupported defining stmt:\n");
10043 return false;
10046 return true;
10049 /* Function vect_is_simple_use.
10051 Same as vect_is_simple_use but also determines the vector operand
10052 type of OPERAND and stores it to *VECTYPE. If the definition of
10053 OPERAND is vect_uninitialized_def, vect_constant_def or
10054 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10055 is responsible to compute the best suited vector type for the
10056 scalar operand. */
10058 bool
10059 vect_is_simple_use (tree operand, vec_info *vinfo,
10060 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
10062 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
10063 return false;
10065 /* Now get a vector type if the def is internal, otherwise supply
10066 NULL_TREE and leave it up to the caller to figure out a proper
10067 type for the use stmt. */
10068 if (*dt == vect_internal_def
10069 || *dt == vect_induction_def
10070 || *dt == vect_reduction_def
10071 || *dt == vect_double_reduction_def
10072 || *dt == vect_nested_cycle)
10074 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
10076 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10077 && !STMT_VINFO_RELEVANT (stmt_info)
10078 && !STMT_VINFO_LIVE_P (stmt_info))
10079 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
10081 *vectype = STMT_VINFO_VECTYPE (stmt_info);
10082 gcc_assert (*vectype != NULL_TREE);
10084 else if (*dt == vect_uninitialized_def
10085 || *dt == vect_constant_def
10086 || *dt == vect_external_def)
10087 *vectype = NULL_TREE;
10088 else
10089 gcc_unreachable ();
10091 return true;
10095 /* Function supportable_widening_operation
10097 Check whether an operation represented by the code CODE is a
10098 widening operation that is supported by the target platform in
10099 vector form (i.e., when operating on arguments of type VECTYPE_IN
10100 producing a result of type VECTYPE_OUT).
10102 Widening operations we currently support are NOP (CONVERT), FLOAT
10103 and WIDEN_MULT. This function checks if these operations are supported
10104 by the target platform either directly (via vector tree-codes), or via
10105 target builtins.
10107 Output:
10108 - CODE1 and CODE2 are codes of vector operations to be used when
10109 vectorizing the operation, if available.
10110 - MULTI_STEP_CVT determines the number of required intermediate steps in
10111 case of multi-step conversion (like char->short->int - in that case
10112 MULTI_STEP_CVT will be 1).
10113 - INTERM_TYPES contains the intermediate type required to perform the
10114 widening operation (short in the above example). */
10116 bool
10117 supportable_widening_operation (enum tree_code code, gimple *stmt,
10118 tree vectype_out, tree vectype_in,
10119 enum tree_code *code1, enum tree_code *code2,
10120 int *multi_step_cvt,
10121 vec<tree> *interm_types)
10123 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
10124 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
10125 struct loop *vect_loop = NULL;
10126 machine_mode vec_mode;
10127 enum insn_code icode1, icode2;
10128 optab optab1, optab2;
10129 tree vectype = vectype_in;
10130 tree wide_vectype = vectype_out;
10131 enum tree_code c1, c2;
10132 int i;
10133 tree prev_type, intermediate_type;
10134 machine_mode intermediate_mode, prev_mode;
10135 optab optab3, optab4;
10137 *multi_step_cvt = 0;
10138 if (loop_info)
10139 vect_loop = LOOP_VINFO_LOOP (loop_info);
10141 switch (code)
10143 case WIDEN_MULT_EXPR:
10144 /* The result of a vectorized widening operation usually requires
10145 two vectors (because the widened results do not fit into one vector).
10146 The generated vector results would normally be expected to be
10147 generated in the same order as in the original scalar computation,
10148 i.e. if 8 results are generated in each vector iteration, they are
10149 to be organized as follows:
10150 vect1: [res1,res2,res3,res4],
10151 vect2: [res5,res6,res7,res8].
10153 However, in the special case that the result of the widening
10154 operation is used in a reduction computation only, the order doesn't
10155 matter (because when vectorizing a reduction we change the order of
10156 the computation). Some targets can take advantage of this and
10157 generate more efficient code. For example, targets like Altivec,
10158 that support widen_mult using a sequence of {mult_even,mult_odd}
10159 generate the following vectors:
10160 vect1: [res1,res3,res5,res7],
10161 vect2: [res2,res4,res6,res8].
10163 When vectorizing outer-loops, we execute the inner-loop sequentially
10164 (each vectorized inner-loop iteration contributes to VF outer-loop
10165 iterations in parallel). We therefore don't allow to change the
10166 order of the computation in the inner-loop during outer-loop
10167 vectorization. */
10168 /* TODO: Another case in which order doesn't *really* matter is when we
10169 widen and then contract again, e.g. (short)((int)x * y >> 8).
10170 Normally, pack_trunc performs an even/odd permute, whereas the
10171 repack from an even/odd expansion would be an interleave, which
10172 would be significantly simpler for e.g. AVX2. */
10173 /* In any case, in order to avoid duplicating the code below, recurse
10174 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10175 are properly set up for the caller. If we fail, we'll continue with
10176 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10177 if (vect_loop
10178 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10179 && !nested_in_vect_loop_p (vect_loop, stmt)
10180 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10181 stmt, vectype_out, vectype_in,
10182 code1, code2, multi_step_cvt,
10183 interm_types))
10185 /* Elements in a vector with vect_used_by_reduction property cannot
10186 be reordered if the use chain with this property does not have the
10187 same operation. One such an example is s += a * b, where elements
10188 in a and b cannot be reordered. Here we check if the vector defined
10189 by STMT is only directly used in the reduction statement. */
10190 tree lhs = gimple_assign_lhs (stmt);
10191 use_operand_p dummy;
10192 gimple *use_stmt;
10193 stmt_vec_info use_stmt_info = NULL;
10194 if (single_imm_use (lhs, &dummy, &use_stmt)
10195 && (use_stmt_info = vinfo_for_stmt (use_stmt))
10196 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10197 return true;
10199 c1 = VEC_WIDEN_MULT_LO_EXPR;
10200 c2 = VEC_WIDEN_MULT_HI_EXPR;
10201 break;
10203 case DOT_PROD_EXPR:
10204 c1 = DOT_PROD_EXPR;
10205 c2 = DOT_PROD_EXPR;
10206 break;
10208 case SAD_EXPR:
10209 c1 = SAD_EXPR;
10210 c2 = SAD_EXPR;
10211 break;
10213 case VEC_WIDEN_MULT_EVEN_EXPR:
10214 /* Support the recursion induced just above. */
10215 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10216 c2 = VEC_WIDEN_MULT_ODD_EXPR;
10217 break;
10219 case WIDEN_LSHIFT_EXPR:
10220 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10221 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
10222 break;
10224 CASE_CONVERT:
10225 c1 = VEC_UNPACK_LO_EXPR;
10226 c2 = VEC_UNPACK_HI_EXPR;
10227 break;
10229 case FLOAT_EXPR:
10230 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10231 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
10232 break;
10234 case FIX_TRUNC_EXPR:
10235 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
10236 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
10237 computing the operation. */
10238 return false;
10240 default:
10241 gcc_unreachable ();
10244 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
10245 std::swap (c1, c2);
10247 if (code == FIX_TRUNC_EXPR)
10249 /* The signedness is determined from output operand. */
10250 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10251 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
10253 else
10255 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10256 optab2 = optab_for_tree_code (c2, vectype, optab_default);
10259 if (!optab1 || !optab2)
10260 return false;
10262 vec_mode = TYPE_MODE (vectype);
10263 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10264 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
10265 return false;
10267 *code1 = c1;
10268 *code2 = c2;
10270 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10271 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10272 /* For scalar masks we may have different boolean
10273 vector types having the same QImode. Thus we
10274 add additional check for elements number. */
10275 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10276 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10277 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10279 /* Check if it's a multi-step conversion that can be done using intermediate
10280 types. */
10282 prev_type = vectype;
10283 prev_mode = vec_mode;
10285 if (!CONVERT_EXPR_CODE_P (code))
10286 return false;
10288 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10289 intermediate steps in promotion sequence. We try
10290 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10291 not. */
10292 interm_types->create (MAX_INTERM_CVT_STEPS);
10293 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10295 intermediate_mode = insn_data[icode1].operand[0].mode;
10296 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10298 intermediate_type = vect_halve_mask_nunits (prev_type);
10299 if (intermediate_mode != TYPE_MODE (intermediate_type))
10300 return false;
10302 else
10303 intermediate_type
10304 = lang_hooks.types.type_for_mode (intermediate_mode,
10305 TYPE_UNSIGNED (prev_type));
10307 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10308 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10310 if (!optab3 || !optab4
10311 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10312 || insn_data[icode1].operand[0].mode != intermediate_mode
10313 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10314 || insn_data[icode2].operand[0].mode != intermediate_mode
10315 || ((icode1 = optab_handler (optab3, intermediate_mode))
10316 == CODE_FOR_nothing)
10317 || ((icode2 = optab_handler (optab4, intermediate_mode))
10318 == CODE_FOR_nothing))
10319 break;
10321 interm_types->quick_push (intermediate_type);
10322 (*multi_step_cvt)++;
10324 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10325 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10326 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10327 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10328 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10330 prev_type = intermediate_type;
10331 prev_mode = intermediate_mode;
10334 interm_types->release ();
10335 return false;
10339 /* Function supportable_narrowing_operation
10341 Check whether an operation represented by the code CODE is a
10342 narrowing operation that is supported by the target platform in
10343 vector form (i.e., when operating on arguments of type VECTYPE_IN
10344 and producing a result of type VECTYPE_OUT).
10346 Narrowing operations we currently support are NOP (CONVERT) and
10347 FIX_TRUNC. This function checks if these operations are supported by
10348 the target platform directly via vector tree-codes.
10350 Output:
10351 - CODE1 is the code of a vector operation to be used when
10352 vectorizing the operation, if available.
10353 - MULTI_STEP_CVT determines the number of required intermediate steps in
10354 case of multi-step conversion (like int->short->char - in that case
10355 MULTI_STEP_CVT will be 1).
10356 - INTERM_TYPES contains the intermediate type required to perform the
10357 narrowing operation (short in the above example). */
10359 bool
10360 supportable_narrowing_operation (enum tree_code code,
10361 tree vectype_out, tree vectype_in,
10362 enum tree_code *code1, int *multi_step_cvt,
10363 vec<tree> *interm_types)
10365 machine_mode vec_mode;
10366 enum insn_code icode1;
10367 optab optab1, interm_optab;
10368 tree vectype = vectype_in;
10369 tree narrow_vectype = vectype_out;
10370 enum tree_code c1;
10371 tree intermediate_type, prev_type;
10372 machine_mode intermediate_mode, prev_mode;
10373 int i;
10374 bool uns;
10376 *multi_step_cvt = 0;
10377 switch (code)
10379 CASE_CONVERT:
10380 c1 = VEC_PACK_TRUNC_EXPR;
10381 break;
10383 case FIX_TRUNC_EXPR:
10384 c1 = VEC_PACK_FIX_TRUNC_EXPR;
10385 break;
10387 case FLOAT_EXPR:
10388 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
10389 tree code and optabs used for computing the operation. */
10390 return false;
10392 default:
10393 gcc_unreachable ();
10396 if (code == FIX_TRUNC_EXPR)
10397 /* The signedness is determined from output operand. */
10398 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10399 else
10400 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10402 if (!optab1)
10403 return false;
10405 vec_mode = TYPE_MODE (vectype);
10406 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
10407 return false;
10409 *code1 = c1;
10411 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10412 /* For scalar masks we may have different boolean
10413 vector types having the same QImode. Thus we
10414 add additional check for elements number. */
10415 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10416 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10417 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10419 /* Check if it's a multi-step conversion that can be done using intermediate
10420 types. */
10421 prev_mode = vec_mode;
10422 prev_type = vectype;
10423 if (code == FIX_TRUNC_EXPR)
10424 uns = TYPE_UNSIGNED (vectype_out);
10425 else
10426 uns = TYPE_UNSIGNED (vectype);
10428 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10429 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10430 costly than signed. */
10431 if (code == FIX_TRUNC_EXPR && uns)
10433 enum insn_code icode2;
10435 intermediate_type
10436 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10437 interm_optab
10438 = optab_for_tree_code (c1, intermediate_type, optab_default);
10439 if (interm_optab != unknown_optab
10440 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10441 && insn_data[icode1].operand[0].mode
10442 == insn_data[icode2].operand[0].mode)
10444 uns = false;
10445 optab1 = interm_optab;
10446 icode1 = icode2;
10450 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10451 intermediate steps in promotion sequence. We try
10452 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
10453 interm_types->create (MAX_INTERM_CVT_STEPS);
10454 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10456 intermediate_mode = insn_data[icode1].operand[0].mode;
10457 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10459 intermediate_type = vect_double_mask_nunits (prev_type);
10460 if (intermediate_mode != TYPE_MODE (intermediate_type))
10461 return false;
10463 else
10464 intermediate_type
10465 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
10466 interm_optab
10467 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10468 optab_default);
10469 if (!interm_optab
10470 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10471 || insn_data[icode1].operand[0].mode != intermediate_mode
10472 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10473 == CODE_FOR_nothing))
10474 break;
10476 interm_types->quick_push (intermediate_type);
10477 (*multi_step_cvt)++;
10479 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10480 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10481 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10482 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10484 prev_mode = intermediate_mode;
10485 prev_type = intermediate_type;
10486 optab1 = interm_optab;
10489 interm_types->release ();
10490 return false;
10493 /* Generate and return a statement that sets vector mask MASK such that
10494 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10496 gcall *
10497 vect_gen_while (tree mask, tree start_index, tree end_index)
10499 tree cmp_type = TREE_TYPE (start_index);
10500 tree mask_type = TREE_TYPE (mask);
10501 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10502 cmp_type, mask_type,
10503 OPTIMIZE_FOR_SPEED));
10504 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10505 start_index, end_index,
10506 build_zero_cst (mask_type));
10507 gimple_call_set_lhs (call, mask);
10508 return call;
10511 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10512 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10514 tree
10515 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10516 tree end_index)
10518 tree tmp = make_ssa_name (mask_type);
10519 gcall *call = vect_gen_while (tmp, start_index, end_index);
10520 gimple_seq_add_stmt (seq, call);
10521 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);