* es.po: Update.
[official-gcc.git] / gcc / tree-vect-stmts.c
bloba4f2d7139599106ce3321566303a4c82d62ba353
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
54 /* For lang_hooks.types.type_for_mode. */
55 #include "langhooks.h"
57 /* Return the vectorized type for the given statement. */
59 tree
60 stmt_vectype (struct _stmt_vec_info *stmt_info)
62 return STMT_VINFO_VECTYPE (stmt_info);
65 /* Return TRUE iff the given statement is in an inner loop relative to
66 the loop being vectorized. */
67 bool
68 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
70 gimple *stmt = STMT_VINFO_STMT (stmt_info);
71 basic_block bb = gimple_bb (stmt);
72 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
73 struct loop* loop;
75 if (!loop_vinfo)
76 return false;
78 loop = LOOP_VINFO_LOOP (loop_vinfo);
80 return (bb->loop_father == loop->inner);
83 /* Record the cost of a statement, either by directly informing the
84 target model or by saving it in a vector for later processing.
85 Return a preliminary estimate of the statement's cost. */
87 unsigned
88 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
89 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
90 int misalign, enum vect_cost_model_location where)
92 if ((kind == vector_load || kind == unaligned_load)
93 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
94 kind = vector_gather_load;
95 if ((kind == vector_store || kind == unaligned_store)
96 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
97 kind = vector_scatter_store;
98 if (body_cost_vec)
100 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
101 stmt_info_for_cost si = { count, kind,
102 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
103 misalign };
104 body_cost_vec->safe_push (si);
105 return (unsigned)
106 (builtin_vectorization_cost (kind, vectype, misalign) * count);
108 else
109 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
110 count, kind, stmt_info, misalign, where);
113 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 static tree
116 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
118 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
119 "vect_array");
122 /* ARRAY is an array of vectors created by create_vector_array.
123 Return an SSA_NAME for the vector in index N. The reference
124 is part of the vectorization of STMT and the vector is associated
125 with scalar destination SCALAR_DEST. */
127 static tree
128 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
129 tree array, unsigned HOST_WIDE_INT n)
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (stmt, new_stmt, gsi);
146 return vect_name;
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT. */
153 static void
154 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
155 tree array, unsigned HOST_WIDE_INT n)
157 tree array_ref;
158 gimple *new_stmt;
160 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
161 build_int_cst (size_type_node, n),
162 NULL_TREE, NULL_TREE);
164 new_stmt = gimple_build_assign (array_ref, vect);
165 vect_finish_stmt_generation (stmt, new_stmt, gsi);
168 /* PTR is a pointer to an array of type TYPE. Return a representation
169 of *PTR. The memory reference replaces those in FIRST_DR
170 (and its group). */
172 static tree
173 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
175 tree mem_ref;
177 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
178 /* Arrays have the same alignment as their type. */
179 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
180 return mem_ref;
183 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
185 /* Function vect_mark_relevant.
187 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
189 static void
190 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
191 enum vect_relevant relevant, bool live_p)
193 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
194 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
195 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
196 gimple *pattern_stmt;
198 if (dump_enabled_p ())
200 dump_printf_loc (MSG_NOTE, vect_location,
201 "mark relevant %d, live %d: ", relevant, live_p);
202 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
205 /* If this stmt is an original stmt in a pattern, we might need to mark its
206 related pattern stmt instead of the original stmt. However, such stmts
207 may have their own uses that are not in any pattern, in such cases the
208 stmt itself should be marked. */
209 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
211 /* This is the last stmt in a sequence that was detected as a
212 pattern that can potentially be vectorized. Don't mark the stmt
213 as relevant/live because it's not going to be vectorized.
214 Instead mark the pattern-stmt that replaces it. */
216 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
218 if (dump_enabled_p ())
219 dump_printf_loc (MSG_NOTE, vect_location,
220 "last stmt in pattern. don't mark"
221 " relevant/live.\n");
222 stmt_info = vinfo_for_stmt (pattern_stmt);
223 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
224 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
225 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
226 stmt = pattern_stmt;
229 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
230 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
231 STMT_VINFO_RELEVANT (stmt_info) = relevant;
233 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
234 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
236 if (dump_enabled_p ())
237 dump_printf_loc (MSG_NOTE, vect_location,
238 "already marked relevant/live.\n");
239 return;
242 worklist->safe_push (stmt);
246 /* Function is_simple_and_all_uses_invariant
248 Return true if STMT is simple and all uses of it are invariant. */
250 bool
251 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
253 tree op;
254 gimple *def_stmt;
255 ssa_op_iter iter;
257 if (!is_gimple_assign (stmt))
258 return false;
260 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
262 enum vect_def_type dt = vect_uninitialized_def;
264 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
266 if (dump_enabled_p ())
267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
268 "use not simple.\n");
269 return false;
272 if (dt != vect_external_def && dt != vect_constant_def)
273 return false;
275 return true;
278 /* Function vect_stmt_relevant_p.
280 Return true if STMT in loop that is represented by LOOP_VINFO is
281 "relevant for vectorization".
283 A stmt is considered "relevant for vectorization" if:
284 - it has uses outside the loop.
285 - it has vdefs (it alters memory).
286 - control stmts in the loop (except for the exit condition).
288 CHECKME: what other side effects would the vectorizer allow? */
290 static bool
291 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
292 enum vect_relevant *relevant, bool *live_p)
294 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
295 ssa_op_iter op_iter;
296 imm_use_iterator imm_iter;
297 use_operand_p use_p;
298 def_operand_p def_p;
300 *relevant = vect_unused_in_scope;
301 *live_p = false;
303 /* cond stmt other than loop exit cond. */
304 if (is_ctrl_stmt (stmt)
305 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
306 != loop_exit_ctrl_vec_info_type)
307 *relevant = vect_used_in_scope;
309 /* changing memory. */
310 if (gimple_code (stmt) != GIMPLE_PHI)
311 if (gimple_vdef (stmt)
312 && !gimple_clobber_p (stmt))
314 if (dump_enabled_p ())
315 dump_printf_loc (MSG_NOTE, vect_location,
316 "vec_stmt_relevant_p: stmt has vdefs.\n");
317 *relevant = vect_used_in_scope;
320 /* uses outside the loop. */
321 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
323 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
325 basic_block bb = gimple_bb (USE_STMT (use_p));
326 if (!flow_bb_inside_loop_p (loop, bb))
328 if (dump_enabled_p ())
329 dump_printf_loc (MSG_NOTE, vect_location,
330 "vec_stmt_relevant_p: used out of loop.\n");
332 if (is_gimple_debug (USE_STMT (use_p)))
333 continue;
335 /* We expect all such uses to be in the loop exit phis
336 (because of loop closed form) */
337 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
338 gcc_assert (bb == single_exit (loop)->dest);
340 *live_p = true;
345 if (*live_p && *relevant == vect_unused_in_scope
346 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
348 if (dump_enabled_p ())
349 dump_printf_loc (MSG_NOTE, vect_location,
350 "vec_stmt_relevant_p: stmt live but not relevant.\n");
351 *relevant = vect_used_only_live;
354 return (*live_p || *relevant);
358 /* Function exist_non_indexing_operands_for_use_p
360 USE is one of the uses attached to STMT. Check if USE is
361 used in STMT for anything other than indexing an array. */
363 static bool
364 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
366 tree operand;
367 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
369 /* USE corresponds to some operand in STMT. If there is no data
370 reference in STMT, then any operand that corresponds to USE
371 is not indexing an array. */
372 if (!STMT_VINFO_DATA_REF (stmt_info))
373 return true;
375 /* STMT has a data_ref. FORNOW this means that its of one of
376 the following forms:
377 -1- ARRAY_REF = var
378 -2- var = ARRAY_REF
379 (This should have been verified in analyze_data_refs).
381 'var' in the second case corresponds to a def, not a use,
382 so USE cannot correspond to any operands that are not used
383 for array indexing.
385 Therefore, all we need to check is if STMT falls into the
386 first case, and whether var corresponds to USE. */
388 if (!gimple_assign_copy_p (stmt))
390 if (is_gimple_call (stmt)
391 && gimple_call_internal_p (stmt))
392 switch (gimple_call_internal_fn (stmt))
394 case IFN_MASK_STORE:
395 operand = gimple_call_arg (stmt, 3);
396 if (operand == use)
397 return true;
398 /* FALLTHRU */
399 case IFN_MASK_LOAD:
400 operand = gimple_call_arg (stmt, 2);
401 if (operand == use)
402 return true;
403 break;
404 default:
405 break;
407 return false;
410 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
411 return false;
412 operand = gimple_assign_rhs1 (stmt);
413 if (TREE_CODE (operand) != SSA_NAME)
414 return false;
416 if (operand == use)
417 return true;
419 return false;
424 Function process_use.
426 Inputs:
427 - a USE in STMT in a loop represented by LOOP_VINFO
428 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
429 that defined USE. This is done by calling mark_relevant and passing it
430 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
431 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
432 be performed.
434 Outputs:
435 Generally, LIVE_P and RELEVANT are used to define the liveness and
436 relevance info of the DEF_STMT of this USE:
437 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
438 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
439 Exceptions:
440 - case 1: If USE is used only for address computations (e.g. array indexing),
441 which does not need to be directly vectorized, then the liveness/relevance
442 of the respective DEF_STMT is left unchanged.
443 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
444 skip DEF_STMT cause it had already been processed.
445 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
446 be modified accordingly.
448 Return true if everything is as expected. Return false otherwise. */
450 static bool
451 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
452 enum vect_relevant relevant, vec<gimple *> *worklist,
453 bool force)
455 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
456 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
457 stmt_vec_info dstmt_vinfo;
458 basic_block bb, def_bb;
459 gimple *def_stmt;
460 enum vect_def_type dt;
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
465 return true;
467 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
469 if (dump_enabled_p ())
470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
471 "not vectorized: unsupported use in stmt.\n");
472 return false;
475 if (!def_stmt || gimple_nop_p (def_stmt))
476 return true;
478 def_bb = gimple_bb (def_stmt);
479 if (!flow_bb_inside_loop_p (loop, def_bb))
481 if (dump_enabled_p ())
482 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
483 return true;
486 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
487 DEF_STMT must have already been processed, because this should be the
488 only way that STMT, which is a reduction-phi, was put in the worklist,
489 as there should be no other uses for DEF_STMT in the loop. So we just
490 check that everything is as expected, and we are done. */
491 dstmt_vinfo = vinfo_for_stmt (def_stmt);
492 bb = gimple_bb (stmt);
493 if (gimple_code (stmt) == GIMPLE_PHI
494 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
495 && gimple_code (def_stmt) != GIMPLE_PHI
496 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
497 && bb->loop_father == def_bb->loop_father)
499 if (dump_enabled_p ())
500 dump_printf_loc (MSG_NOTE, vect_location,
501 "reduc-stmt defining reduc-phi in the same nest.\n");
502 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
503 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
504 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
505 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
506 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
507 return true;
510 /* case 3a: outer-loop stmt defining an inner-loop stmt:
511 outer-loop-header-bb:
512 d = def_stmt
513 inner-loop:
514 stmt # use (d)
515 outer-loop-tail-bb:
516 ... */
517 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
519 if (dump_enabled_p ())
520 dump_printf_loc (MSG_NOTE, vect_location,
521 "outer-loop def-stmt defining inner-loop stmt.\n");
523 switch (relevant)
525 case vect_unused_in_scope:
526 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
527 vect_used_in_scope : vect_unused_in_scope;
528 break;
530 case vect_used_in_outer_by_reduction:
531 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
532 relevant = vect_used_by_reduction;
533 break;
535 case vect_used_in_outer:
536 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
537 relevant = vect_used_in_scope;
538 break;
540 case vect_used_in_scope:
541 break;
543 default:
544 gcc_unreachable ();
548 /* case 3b: inner-loop stmt defining an outer-loop stmt:
549 outer-loop-header-bb:
551 inner-loop:
552 d = def_stmt
553 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
554 stmt # use (d) */
555 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
557 if (dump_enabled_p ())
558 dump_printf_loc (MSG_NOTE, vect_location,
559 "inner-loop def-stmt defining outer-loop stmt.\n");
561 switch (relevant)
563 case vect_unused_in_scope:
564 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
565 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
566 vect_used_in_outer_by_reduction : vect_unused_in_scope;
567 break;
569 case vect_used_by_reduction:
570 case vect_used_only_live:
571 relevant = vect_used_in_outer_by_reduction;
572 break;
574 case vect_used_in_scope:
575 relevant = vect_used_in_outer;
576 break;
578 default:
579 gcc_unreachable ();
582 /* We are also not interested in uses on loop PHI backedges that are
583 inductions. Otherwise we'll needlessly vectorize the IV increment
584 and cause hybrid SLP for SLP inductions. Unless the PHI is live
585 of course. */
586 else if (gimple_code (stmt) == GIMPLE_PHI
587 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
588 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
589 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
590 == use))
592 if (dump_enabled_p ())
593 dump_printf_loc (MSG_NOTE, vect_location,
594 "induction value on backedge.\n");
595 return true;
599 vect_mark_relevant (worklist, def_stmt, relevant, false);
600 return true;
604 /* Function vect_mark_stmts_to_be_vectorized.
606 Not all stmts in the loop need to be vectorized. For example:
608 for i...
609 for j...
610 1. T0 = i + j
611 2. T1 = a[T0]
613 3. j = j + 1
615 Stmt 1 and 3 do not need to be vectorized, because loop control and
616 addressing of vectorized data-refs are handled differently.
618 This pass detects such stmts. */
620 bool
621 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
623 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
624 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
625 unsigned int nbbs = loop->num_nodes;
626 gimple_stmt_iterator si;
627 gimple *stmt;
628 unsigned int i;
629 stmt_vec_info stmt_vinfo;
630 basic_block bb;
631 gimple *phi;
632 bool live_p;
633 enum vect_relevant relevant;
635 if (dump_enabled_p ())
636 dump_printf_loc (MSG_NOTE, vect_location,
637 "=== vect_mark_stmts_to_be_vectorized ===\n");
639 auto_vec<gimple *, 64> worklist;
641 /* 1. Init worklist. */
642 for (i = 0; i < nbbs; i++)
644 bb = bbs[i];
645 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
647 phi = gsi_stmt (si);
648 if (dump_enabled_p ())
650 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
651 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
654 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
655 vect_mark_relevant (&worklist, phi, relevant, live_p);
657 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
659 stmt = gsi_stmt (si);
660 if (dump_enabled_p ())
662 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
663 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
666 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
667 vect_mark_relevant (&worklist, stmt, relevant, live_p);
671 /* 2. Process_worklist */
672 while (worklist.length () > 0)
674 use_operand_p use_p;
675 ssa_op_iter iter;
677 stmt = worklist.pop ();
678 if (dump_enabled_p ())
680 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
681 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
684 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
685 (DEF_STMT) as relevant/irrelevant according to the relevance property
686 of STMT. */
687 stmt_vinfo = vinfo_for_stmt (stmt);
688 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
690 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
691 propagated as is to the DEF_STMTs of its USEs.
693 One exception is when STMT has been identified as defining a reduction
694 variable; in this case we set the relevance to vect_used_by_reduction.
695 This is because we distinguish between two kinds of relevant stmts -
696 those that are used by a reduction computation, and those that are
697 (also) used by a regular computation. This allows us later on to
698 identify stmts that are used solely by a reduction, and therefore the
699 order of the results that they produce does not have to be kept. */
701 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
703 case vect_reduction_def:
704 gcc_assert (relevant != vect_unused_in_scope);
705 if (relevant != vect_unused_in_scope
706 && relevant != vect_used_in_scope
707 && relevant != vect_used_by_reduction
708 && relevant != vect_used_only_live)
710 if (dump_enabled_p ())
711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
712 "unsupported use of reduction.\n");
713 return false;
715 break;
717 case vect_nested_cycle:
718 if (relevant != vect_unused_in_scope
719 && relevant != vect_used_in_outer_by_reduction
720 && relevant != vect_used_in_outer)
722 if (dump_enabled_p ())
723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
724 "unsupported use of nested cycle.\n");
726 return false;
728 break;
730 case vect_double_reduction_def:
731 if (relevant != vect_unused_in_scope
732 && relevant != vect_used_by_reduction
733 && relevant != vect_used_only_live)
735 if (dump_enabled_p ())
736 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
737 "unsupported use of double reduction.\n");
739 return false;
741 break;
743 default:
744 break;
747 if (is_pattern_stmt_p (stmt_vinfo))
749 /* Pattern statements are not inserted into the code, so
750 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
751 have to scan the RHS or function arguments instead. */
752 if (is_gimple_assign (stmt))
754 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
755 tree op = gimple_assign_rhs1 (stmt);
757 i = 1;
758 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
760 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
761 relevant, &worklist, false)
762 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
763 relevant, &worklist, false))
764 return false;
765 i = 2;
767 for (; i < gimple_num_ops (stmt); i++)
769 op = gimple_op (stmt, i);
770 if (TREE_CODE (op) == SSA_NAME
771 && !process_use (stmt, op, loop_vinfo, relevant,
772 &worklist, false))
773 return false;
776 else if (is_gimple_call (stmt))
778 for (i = 0; i < gimple_call_num_args (stmt); i++)
780 tree arg = gimple_call_arg (stmt, i);
781 if (!process_use (stmt, arg, loop_vinfo, relevant,
782 &worklist, false))
783 return false;
787 else
788 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
790 tree op = USE_FROM_PTR (use_p);
791 if (!process_use (stmt, op, loop_vinfo, relevant,
792 &worklist, false))
793 return false;
796 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
798 gather_scatter_info gs_info;
799 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
800 gcc_unreachable ();
801 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
802 &worklist, true))
803 return false;
805 } /* while worklist */
807 return true;
811 /* Function vect_model_simple_cost.
813 Models cost for simple operations, i.e. those that only emit ncopies of a
814 single op. Right now, this does not account for multiple insns that could
815 be generated for the single vector op. We will handle that shortly. */
817 void
818 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
819 enum vect_def_type *dt,
820 int ndts,
821 stmt_vector_for_cost *prologue_cost_vec,
822 stmt_vector_for_cost *body_cost_vec)
824 int i;
825 int inside_cost = 0, prologue_cost = 0;
827 /* The SLP costs were already calculated during SLP tree build. */
828 if (PURE_SLP_STMT (stmt_info))
829 return;
831 /* Cost the "broadcast" of a scalar operand in to a vector operand.
832 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
833 cost model. */
834 for (i = 0; i < ndts; i++)
835 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
836 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
837 stmt_info, 0, vect_prologue);
839 /* Pass the inside-of-loop statements to the target-specific cost model. */
840 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
841 stmt_info, 0, vect_body);
843 if (dump_enabled_p ())
844 dump_printf_loc (MSG_NOTE, vect_location,
845 "vect_model_simple_cost: inside_cost = %d, "
846 "prologue_cost = %d .\n", inside_cost, prologue_cost);
850 /* Model cost for type demotion and promotion operations. PWR is normally
851 zero for single-step promotions and demotions. It will be one if
852 two-step promotion/demotion is required, and so on. Each additional
853 step doubles the number of instructions required. */
855 static void
856 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
857 enum vect_def_type *dt, int pwr)
859 int i, tmp;
860 int inside_cost = 0, prologue_cost = 0;
861 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
862 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
863 void *target_cost_data;
865 /* The SLP costs were already calculated during SLP tree build. */
866 if (PURE_SLP_STMT (stmt_info))
867 return;
869 if (loop_vinfo)
870 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
871 else
872 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
874 for (i = 0; i < pwr + 1; i++)
876 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
877 (i + 1) : i;
878 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
879 vec_promote_demote, stmt_info, 0,
880 vect_body);
883 /* FORNOW: Assuming maximum 2 args per stmts. */
884 for (i = 0; i < 2; i++)
885 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
886 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
887 stmt_info, 0, vect_prologue);
889 if (dump_enabled_p ())
890 dump_printf_loc (MSG_NOTE, vect_location,
891 "vect_model_promotion_demotion_cost: inside_cost = %d, "
892 "prologue_cost = %d .\n", inside_cost, prologue_cost);
895 /* Function vect_model_store_cost
897 Models cost for stores. In the case of grouped accesses, one access
898 has the overhead of the grouped access attributed to it. */
900 void
901 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
902 vect_memory_access_type memory_access_type,
903 vec_load_store_type vls_type, slp_tree slp_node,
904 stmt_vector_for_cost *prologue_cost_vec,
905 stmt_vector_for_cost *body_cost_vec)
907 unsigned int inside_cost = 0, prologue_cost = 0;
908 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
909 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
910 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
912 if (vls_type == VLS_STORE_INVARIANT)
913 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
914 stmt_info, 0, vect_prologue);
916 /* Grouped stores update all elements in the group at once,
917 so we want the DR for the first statement. */
918 if (!slp_node && grouped_access_p)
920 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
921 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
924 /* True if we should include any once-per-group costs as well as
925 the cost of the statement itself. For SLP we only get called
926 once per group anyhow. */
927 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
929 /* We assume that the cost of a single store-lanes instruction is
930 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
931 access is instead being provided by a permute-and-store operation,
932 include the cost of the permutes. */
933 if (first_stmt_p
934 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
936 /* Uses a high and low interleave or shuffle operations for each
937 needed permute. */
938 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
939 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
940 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
941 stmt_info, 0, vect_body);
943 if (dump_enabled_p ())
944 dump_printf_loc (MSG_NOTE, vect_location,
945 "vect_model_store_cost: strided group_size = %d .\n",
946 group_size);
949 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
950 /* Costs of the stores. */
951 if (memory_access_type == VMAT_ELEMENTWISE
952 || memory_access_type == VMAT_GATHER_SCATTER)
954 /* N scalar stores plus extracting the elements. */
955 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
956 inside_cost += record_stmt_cost (body_cost_vec,
957 ncopies * assumed_nunits,
958 scalar_store, stmt_info, 0, vect_body);
960 else
961 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
963 if (memory_access_type == VMAT_ELEMENTWISE
964 || memory_access_type == VMAT_STRIDED_SLP)
966 /* N scalar stores plus extracting the elements. */
967 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
968 inside_cost += record_stmt_cost (body_cost_vec,
969 ncopies * assumed_nunits,
970 vec_to_scalar, stmt_info, 0, vect_body);
973 if (dump_enabled_p ())
974 dump_printf_loc (MSG_NOTE, vect_location,
975 "vect_model_store_cost: inside_cost = %d, "
976 "prologue_cost = %d .\n", inside_cost, prologue_cost);
980 /* Calculate cost of DR's memory access. */
981 void
982 vect_get_store_cost (struct data_reference *dr, int ncopies,
983 unsigned int *inside_cost,
984 stmt_vector_for_cost *body_cost_vec)
986 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
987 gimple *stmt = DR_STMT (dr);
988 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
990 switch (alignment_support_scheme)
992 case dr_aligned:
994 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
995 vector_store, stmt_info, 0,
996 vect_body);
998 if (dump_enabled_p ())
999 dump_printf_loc (MSG_NOTE, vect_location,
1000 "vect_model_store_cost: aligned.\n");
1001 break;
1004 case dr_unaligned_supported:
1006 /* Here, we assign an additional cost for the unaligned store. */
1007 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1008 unaligned_store, stmt_info,
1009 DR_MISALIGNMENT (dr), vect_body);
1010 if (dump_enabled_p ())
1011 dump_printf_loc (MSG_NOTE, vect_location,
1012 "vect_model_store_cost: unaligned supported by "
1013 "hardware.\n");
1014 break;
1017 case dr_unaligned_unsupported:
1019 *inside_cost = VECT_MAX_COST;
1021 if (dump_enabled_p ())
1022 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1023 "vect_model_store_cost: unsupported access.\n");
1024 break;
1027 default:
1028 gcc_unreachable ();
1033 /* Function vect_model_load_cost
1035 Models cost for loads. In the case of grouped accesses, one access has
1036 the overhead of the grouped access attributed to it. Since unaligned
1037 accesses are supported for loads, we also account for the costs of the
1038 access scheme chosen. */
1040 void
1041 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1042 vect_memory_access_type memory_access_type,
1043 slp_tree slp_node,
1044 stmt_vector_for_cost *prologue_cost_vec,
1045 stmt_vector_for_cost *body_cost_vec)
1047 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1048 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1049 unsigned int inside_cost = 0, prologue_cost = 0;
1050 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1052 /* Grouped loads read all elements in the group at once,
1053 so we want the DR for the first statement. */
1054 if (!slp_node && grouped_access_p)
1056 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1057 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1060 /* True if we should include any once-per-group costs as well as
1061 the cost of the statement itself. For SLP we only get called
1062 once per group anyhow. */
1063 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1065 /* We assume that the cost of a single load-lanes instruction is
1066 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1067 access is instead being provided by a load-and-permute operation,
1068 include the cost of the permutes. */
1069 if (first_stmt_p
1070 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1072 /* Uses an even and odd extract operations or shuffle operations
1073 for each needed permute. */
1074 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1075 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1076 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1077 stmt_info, 0, vect_body);
1079 if (dump_enabled_p ())
1080 dump_printf_loc (MSG_NOTE, vect_location,
1081 "vect_model_load_cost: strided group_size = %d .\n",
1082 group_size);
1085 /* The loads themselves. */
1086 if (memory_access_type == VMAT_ELEMENTWISE
1087 || memory_access_type == VMAT_GATHER_SCATTER)
1089 /* N scalar loads plus gathering them into a vector. */
1090 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1091 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1092 inside_cost += record_stmt_cost (body_cost_vec,
1093 ncopies * assumed_nunits,
1094 scalar_load, stmt_info, 0, vect_body);
1096 else
1097 vect_get_load_cost (dr, ncopies, first_stmt_p,
1098 &inside_cost, &prologue_cost,
1099 prologue_cost_vec, body_cost_vec, true);
1100 if (memory_access_type == VMAT_ELEMENTWISE
1101 || memory_access_type == VMAT_STRIDED_SLP)
1102 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1103 stmt_info, 0, vect_body);
1105 if (dump_enabled_p ())
1106 dump_printf_loc (MSG_NOTE, vect_location,
1107 "vect_model_load_cost: inside_cost = %d, "
1108 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1112 /* Calculate cost of DR's memory access. */
1113 void
1114 vect_get_load_cost (struct data_reference *dr, int ncopies,
1115 bool add_realign_cost, unsigned int *inside_cost,
1116 unsigned int *prologue_cost,
1117 stmt_vector_for_cost *prologue_cost_vec,
1118 stmt_vector_for_cost *body_cost_vec,
1119 bool record_prologue_costs)
1121 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1122 gimple *stmt = DR_STMT (dr);
1123 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1125 switch (alignment_support_scheme)
1127 case dr_aligned:
1129 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1130 stmt_info, 0, vect_body);
1132 if (dump_enabled_p ())
1133 dump_printf_loc (MSG_NOTE, vect_location,
1134 "vect_model_load_cost: aligned.\n");
1136 break;
1138 case dr_unaligned_supported:
1140 /* Here, we assign an additional cost for the unaligned load. */
1141 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1142 unaligned_load, stmt_info,
1143 DR_MISALIGNMENT (dr), vect_body);
1145 if (dump_enabled_p ())
1146 dump_printf_loc (MSG_NOTE, vect_location,
1147 "vect_model_load_cost: unaligned supported by "
1148 "hardware.\n");
1150 break;
1152 case dr_explicit_realign:
1154 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1155 vector_load, stmt_info, 0, vect_body);
1156 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1157 vec_perm, stmt_info, 0, vect_body);
1159 /* FIXME: If the misalignment remains fixed across the iterations of
1160 the containing loop, the following cost should be added to the
1161 prologue costs. */
1162 if (targetm.vectorize.builtin_mask_for_load)
1163 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1164 stmt_info, 0, vect_body);
1166 if (dump_enabled_p ())
1167 dump_printf_loc (MSG_NOTE, vect_location,
1168 "vect_model_load_cost: explicit realign\n");
1170 break;
1172 case dr_explicit_realign_optimized:
1174 if (dump_enabled_p ())
1175 dump_printf_loc (MSG_NOTE, vect_location,
1176 "vect_model_load_cost: unaligned software "
1177 "pipelined.\n");
1179 /* Unaligned software pipeline has a load of an address, an initial
1180 load, and possibly a mask operation to "prime" the loop. However,
1181 if this is an access in a group of loads, which provide grouped
1182 access, then the above cost should only be considered for one
1183 access in the group. Inside the loop, there is a load op
1184 and a realignment op. */
1186 if (add_realign_cost && record_prologue_costs)
1188 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1189 vector_stmt, stmt_info,
1190 0, vect_prologue);
1191 if (targetm.vectorize.builtin_mask_for_load)
1192 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1193 vector_stmt, stmt_info,
1194 0, vect_prologue);
1197 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1198 stmt_info, 0, vect_body);
1199 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1200 stmt_info, 0, vect_body);
1202 if (dump_enabled_p ())
1203 dump_printf_loc (MSG_NOTE, vect_location,
1204 "vect_model_load_cost: explicit realign optimized"
1205 "\n");
1207 break;
1210 case dr_unaligned_unsupported:
1212 *inside_cost = VECT_MAX_COST;
1214 if (dump_enabled_p ())
1215 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1216 "vect_model_load_cost: unsupported access.\n");
1217 break;
1220 default:
1221 gcc_unreachable ();
1225 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1226 the loop preheader for the vectorized stmt STMT. */
1228 static void
1229 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1231 if (gsi)
1232 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1233 else
1235 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1236 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1238 if (loop_vinfo)
1240 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1241 basic_block new_bb;
1242 edge pe;
1244 if (nested_in_vect_loop_p (loop, stmt))
1245 loop = loop->inner;
1247 pe = loop_preheader_edge (loop);
1248 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1249 gcc_assert (!new_bb);
1251 else
1253 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1254 basic_block bb;
1255 gimple_stmt_iterator gsi_bb_start;
1257 gcc_assert (bb_vinfo);
1258 bb = BB_VINFO_BB (bb_vinfo);
1259 gsi_bb_start = gsi_after_labels (bb);
1260 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1264 if (dump_enabled_p ())
1266 dump_printf_loc (MSG_NOTE, vect_location,
1267 "created new init_stmt: ");
1268 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1272 /* Function vect_init_vector.
1274 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1275 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1276 vector type a vector with all elements equal to VAL is created first.
1277 Place the initialization at BSI if it is not NULL. Otherwise, place the
1278 initialization at the loop preheader.
1279 Return the DEF of INIT_STMT.
1280 It will be used in the vectorization of STMT. */
1282 tree
1283 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1285 gimple *init_stmt;
1286 tree new_temp;
1288 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1289 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1291 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1292 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1294 /* Scalar boolean value should be transformed into
1295 all zeros or all ones value before building a vector. */
1296 if (VECTOR_BOOLEAN_TYPE_P (type))
1298 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1299 tree false_val = build_zero_cst (TREE_TYPE (type));
1301 if (CONSTANT_CLASS_P (val))
1302 val = integer_zerop (val) ? false_val : true_val;
1303 else
1305 new_temp = make_ssa_name (TREE_TYPE (type));
1306 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1307 val, true_val, false_val);
1308 vect_init_vector_1 (stmt, init_stmt, gsi);
1309 val = new_temp;
1312 else if (CONSTANT_CLASS_P (val))
1313 val = fold_convert (TREE_TYPE (type), val);
1314 else
1316 new_temp = make_ssa_name (TREE_TYPE (type));
1317 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1318 init_stmt = gimple_build_assign (new_temp,
1319 fold_build1 (VIEW_CONVERT_EXPR,
1320 TREE_TYPE (type),
1321 val));
1322 else
1323 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1324 vect_init_vector_1 (stmt, init_stmt, gsi);
1325 val = new_temp;
1328 val = build_vector_from_val (type, val);
1331 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1332 init_stmt = gimple_build_assign (new_temp, val);
1333 vect_init_vector_1 (stmt, init_stmt, gsi);
1334 return new_temp;
1337 /* Function vect_get_vec_def_for_operand_1.
1339 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1340 DT that will be used in the vectorized stmt. */
1342 tree
1343 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1345 tree vec_oprnd;
1346 gimple *vec_stmt;
1347 stmt_vec_info def_stmt_info = NULL;
1349 switch (dt)
1351 /* operand is a constant or a loop invariant. */
1352 case vect_constant_def:
1353 case vect_external_def:
1354 /* Code should use vect_get_vec_def_for_operand. */
1355 gcc_unreachable ();
1357 /* operand is defined inside the loop. */
1358 case vect_internal_def:
1360 /* Get the def from the vectorized stmt. */
1361 def_stmt_info = vinfo_for_stmt (def_stmt);
1363 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1364 /* Get vectorized pattern statement. */
1365 if (!vec_stmt
1366 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1367 && !STMT_VINFO_RELEVANT (def_stmt_info))
1368 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1369 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1370 gcc_assert (vec_stmt);
1371 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1372 vec_oprnd = PHI_RESULT (vec_stmt);
1373 else if (is_gimple_call (vec_stmt))
1374 vec_oprnd = gimple_call_lhs (vec_stmt);
1375 else
1376 vec_oprnd = gimple_assign_lhs (vec_stmt);
1377 return vec_oprnd;
1380 /* operand is defined by a loop header phi. */
1381 case vect_reduction_def:
1382 case vect_double_reduction_def:
1383 case vect_nested_cycle:
1384 case vect_induction_def:
1386 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1388 /* Get the def from the vectorized stmt. */
1389 def_stmt_info = vinfo_for_stmt (def_stmt);
1390 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1391 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1392 vec_oprnd = PHI_RESULT (vec_stmt);
1393 else
1394 vec_oprnd = gimple_get_lhs (vec_stmt);
1395 return vec_oprnd;
1398 default:
1399 gcc_unreachable ();
1404 /* Function vect_get_vec_def_for_operand.
1406 OP is an operand in STMT. This function returns a (vector) def that will be
1407 used in the vectorized stmt for STMT.
1409 In the case that OP is an SSA_NAME which is defined in the loop, then
1410 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1412 In case OP is an invariant or constant, a new stmt that creates a vector def
1413 needs to be introduced. VECTYPE may be used to specify a required type for
1414 vector invariant. */
1416 tree
1417 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1419 gimple *def_stmt;
1420 enum vect_def_type dt;
1421 bool is_simple_use;
1422 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1423 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1425 if (dump_enabled_p ())
1427 dump_printf_loc (MSG_NOTE, vect_location,
1428 "vect_get_vec_def_for_operand: ");
1429 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1430 dump_printf (MSG_NOTE, "\n");
1433 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1434 gcc_assert (is_simple_use);
1435 if (def_stmt && dump_enabled_p ())
1437 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1438 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1441 if (dt == vect_constant_def || dt == vect_external_def)
1443 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1444 tree vector_type;
1446 if (vectype)
1447 vector_type = vectype;
1448 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1449 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1450 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1451 else
1452 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1454 gcc_assert (vector_type);
1455 return vect_init_vector (stmt, op, vector_type, NULL);
1457 else
1458 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1462 /* Function vect_get_vec_def_for_stmt_copy
1464 Return a vector-def for an operand. This function is used when the
1465 vectorized stmt to be created (by the caller to this function) is a "copy"
1466 created in case the vectorized result cannot fit in one vector, and several
1467 copies of the vector-stmt are required. In this case the vector-def is
1468 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1469 of the stmt that defines VEC_OPRND.
1470 DT is the type of the vector def VEC_OPRND.
1472 Context:
1473 In case the vectorization factor (VF) is bigger than the number
1474 of elements that can fit in a vectype (nunits), we have to generate
1475 more than one vector stmt to vectorize the scalar stmt. This situation
1476 arises when there are multiple data-types operated upon in the loop; the
1477 smallest data-type determines the VF, and as a result, when vectorizing
1478 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1479 vector stmt (each computing a vector of 'nunits' results, and together
1480 computing 'VF' results in each iteration). This function is called when
1481 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1482 which VF=16 and nunits=4, so the number of copies required is 4):
1484 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1486 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1487 VS1.1: vx.1 = memref1 VS1.2
1488 VS1.2: vx.2 = memref2 VS1.3
1489 VS1.3: vx.3 = memref3
1491 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1492 VSnew.1: vz1 = vx.1 + ... VSnew.2
1493 VSnew.2: vz2 = vx.2 + ... VSnew.3
1494 VSnew.3: vz3 = vx.3 + ...
1496 The vectorization of S1 is explained in vectorizable_load.
1497 The vectorization of S2:
1498 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1499 the function 'vect_get_vec_def_for_operand' is called to
1500 get the relevant vector-def for each operand of S2. For operand x it
1501 returns the vector-def 'vx.0'.
1503 To create the remaining copies of the vector-stmt (VSnew.j), this
1504 function is called to get the relevant vector-def for each operand. It is
1505 obtained from the respective VS1.j stmt, which is recorded in the
1506 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1508 For example, to obtain the vector-def 'vx.1' in order to create the
1509 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1510 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1511 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1512 and return its def ('vx.1').
1513 Overall, to create the above sequence this function will be called 3 times:
1514 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1515 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1516 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1518 tree
1519 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1521 gimple *vec_stmt_for_operand;
1522 stmt_vec_info def_stmt_info;
1524 /* Do nothing; can reuse same def. */
1525 if (dt == vect_external_def || dt == vect_constant_def )
1526 return vec_oprnd;
1528 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1529 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1530 gcc_assert (def_stmt_info);
1531 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1532 gcc_assert (vec_stmt_for_operand);
1533 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1534 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1535 else
1536 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1537 return vec_oprnd;
1541 /* Get vectorized definitions for the operands to create a copy of an original
1542 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1544 void
1545 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1546 vec<tree> *vec_oprnds0,
1547 vec<tree> *vec_oprnds1)
1549 tree vec_oprnd = vec_oprnds0->pop ();
1551 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1552 vec_oprnds0->quick_push (vec_oprnd);
1554 if (vec_oprnds1 && vec_oprnds1->length ())
1556 vec_oprnd = vec_oprnds1->pop ();
1557 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1558 vec_oprnds1->quick_push (vec_oprnd);
1563 /* Get vectorized definitions for OP0 and OP1. */
1565 void
1566 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1567 vec<tree> *vec_oprnds0,
1568 vec<tree> *vec_oprnds1,
1569 slp_tree slp_node)
1571 if (slp_node)
1573 int nops = (op1 == NULL_TREE) ? 1 : 2;
1574 auto_vec<tree> ops (nops);
1575 auto_vec<vec<tree> > vec_defs (nops);
1577 ops.quick_push (op0);
1578 if (op1)
1579 ops.quick_push (op1);
1581 vect_get_slp_defs (ops, slp_node, &vec_defs);
1583 *vec_oprnds0 = vec_defs[0];
1584 if (op1)
1585 *vec_oprnds1 = vec_defs[1];
1587 else
1589 tree vec_oprnd;
1591 vec_oprnds0->create (1);
1592 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1593 vec_oprnds0->quick_push (vec_oprnd);
1595 if (op1)
1597 vec_oprnds1->create (1);
1598 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1599 vec_oprnds1->quick_push (vec_oprnd);
1605 /* Function vect_finish_stmt_generation.
1607 Insert a new stmt. */
1609 void
1610 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1611 gimple_stmt_iterator *gsi)
1613 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1614 vec_info *vinfo = stmt_info->vinfo;
1616 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1618 if (!gsi_end_p (*gsi)
1619 && gimple_has_mem_ops (vec_stmt))
1621 gimple *at_stmt = gsi_stmt (*gsi);
1622 tree vuse = gimple_vuse (at_stmt);
1623 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1625 tree vdef = gimple_vdef (at_stmt);
1626 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1627 /* If we have an SSA vuse and insert a store, update virtual
1628 SSA form to avoid triggering the renamer. Do so only
1629 if we can easily see all uses - which is what almost always
1630 happens with the way vectorized stmts are inserted. */
1631 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1632 && ((is_gimple_assign (vec_stmt)
1633 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1634 || (is_gimple_call (vec_stmt)
1635 && !(gimple_call_flags (vec_stmt)
1636 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1638 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1639 gimple_set_vdef (vec_stmt, new_vdef);
1640 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1644 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1646 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1648 if (dump_enabled_p ())
1650 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1651 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1654 gimple_set_location (vec_stmt, gimple_location (stmt));
1656 /* While EH edges will generally prevent vectorization, stmt might
1657 e.g. be in a must-not-throw region. Ensure newly created stmts
1658 that could throw are part of the same region. */
1659 int lp_nr = lookup_stmt_eh_lp (stmt);
1660 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1661 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1664 /* We want to vectorize a call to combined function CFN with function
1665 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1666 as the types of all inputs. Check whether this is possible using
1667 an internal function, returning its code if so or IFN_LAST if not. */
1669 static internal_fn
1670 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1671 tree vectype_out, tree vectype_in)
1673 internal_fn ifn;
1674 if (internal_fn_p (cfn))
1675 ifn = as_internal_fn (cfn);
1676 else
1677 ifn = associated_internal_fn (fndecl);
1678 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1680 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1681 if (info.vectorizable)
1683 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1684 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1685 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1686 OPTIMIZE_FOR_SPEED))
1687 return ifn;
1690 return IFN_LAST;
1694 static tree permute_vec_elements (tree, tree, tree, gimple *,
1695 gimple_stmt_iterator *);
1697 /* STMT is a non-strided load or store, meaning that it accesses
1698 elements with a known constant step. Return -1 if that step
1699 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1701 static int
1702 compare_step_with_zero (gimple *stmt)
1704 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1705 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1706 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
1707 size_zero_node);
1710 /* If the target supports a permute mask that reverses the elements in
1711 a vector of type VECTYPE, return that mask, otherwise return null. */
1713 static tree
1714 perm_mask_for_reverse (tree vectype)
1716 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1718 /* The encoding has a single stepped pattern. */
1719 vec_perm_builder sel (nunits, 1, 3);
1720 for (int i = 0; i < 3; ++i)
1721 sel.quick_push (nunits - 1 - i);
1723 vec_perm_indices indices (sel, 1, nunits);
1724 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1725 return NULL_TREE;
1726 return vect_gen_perm_mask_checked (vectype, indices);
1729 /* STMT is either a masked or unconditional store. Return the value
1730 being stored. */
1732 static tree
1733 vect_get_store_rhs (gimple *stmt)
1735 if (gassign *assign = dyn_cast <gassign *> (stmt))
1737 gcc_assert (gimple_assign_single_p (assign));
1738 return gimple_assign_rhs1 (assign);
1740 if (gcall *call = dyn_cast <gcall *> (stmt))
1742 internal_fn ifn = gimple_call_internal_fn (call);
1743 gcc_assert (ifn == IFN_MASK_STORE);
1744 return gimple_call_arg (stmt, 3);
1746 gcc_unreachable ();
1749 /* A subroutine of get_load_store_type, with a subset of the same
1750 arguments. Handle the case where STMT is part of a grouped load
1751 or store.
1753 For stores, the statements in the group are all consecutive
1754 and there is no gap at the end. For loads, the statements in the
1755 group might not be consecutive; there can be gaps between statements
1756 as well as at the end. */
1758 static bool
1759 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1760 vec_load_store_type vls_type,
1761 vect_memory_access_type *memory_access_type)
1763 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1764 vec_info *vinfo = stmt_info->vinfo;
1765 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1766 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1767 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1768 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1769 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1770 bool single_element_p = (stmt == first_stmt
1771 && !GROUP_NEXT_ELEMENT (stmt_info));
1772 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
1773 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1775 /* True if the vectorized statements would access beyond the last
1776 statement in the group. */
1777 bool overrun_p = false;
1779 /* True if we can cope with such overrun by peeling for gaps, so that
1780 there is at least one final scalar iteration after the vector loop. */
1781 bool can_overrun_p = (vls_type == VLS_LOAD && loop_vinfo && !loop->inner);
1783 /* There can only be a gap at the end of the group if the stride is
1784 known at compile time. */
1785 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1787 /* Stores can't yet have gaps. */
1788 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1790 if (slp)
1792 if (STMT_VINFO_STRIDED_P (stmt_info))
1794 /* Try to use consecutive accesses of GROUP_SIZE elements,
1795 separated by the stride, until we have a complete vector.
1796 Fall back to scalar accesses if that isn't possible. */
1797 if (multiple_p (nunits, group_size))
1798 *memory_access_type = VMAT_STRIDED_SLP;
1799 else
1800 *memory_access_type = VMAT_ELEMENTWISE;
1802 else
1804 overrun_p = loop_vinfo && gap != 0;
1805 if (overrun_p && vls_type != VLS_LOAD)
1807 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1808 "Grouped store with gaps requires"
1809 " non-consecutive accesses\n");
1810 return false;
1812 /* An overrun is fine if the trailing elements are smaller
1813 than the alignment boundary B. Every vector access will
1814 be a multiple of B and so we are guaranteed to access a
1815 non-gap element in the same B-sized block. */
1816 if (overrun_p
1817 && gap < (vect_known_alignment_in_bytes (first_dr)
1818 / vect_get_scalar_dr_size (first_dr)))
1819 overrun_p = false;
1820 if (overrun_p && !can_overrun_p)
1822 if (dump_enabled_p ())
1823 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1824 "Peeling for outer loop is not supported\n");
1825 return false;
1827 *memory_access_type = VMAT_CONTIGUOUS;
1830 else
1832 /* We can always handle this case using elementwise accesses,
1833 but see if something more efficient is available. */
1834 *memory_access_type = VMAT_ELEMENTWISE;
1836 /* If there is a gap at the end of the group then these optimizations
1837 would access excess elements in the last iteration. */
1838 bool would_overrun_p = (gap != 0);
1839 /* An overrun is fine if the trailing elements are smaller than the
1840 alignment boundary B. Every vector access will be a multiple of B
1841 and so we are guaranteed to access a non-gap element in the
1842 same B-sized block. */
1843 if (would_overrun_p
1844 && gap < (vect_known_alignment_in_bytes (first_dr)
1845 / vect_get_scalar_dr_size (first_dr)))
1846 would_overrun_p = false;
1848 if (!STMT_VINFO_STRIDED_P (stmt_info)
1849 && (can_overrun_p || !would_overrun_p)
1850 && compare_step_with_zero (stmt) > 0)
1852 /* First try using LOAD/STORE_LANES. */
1853 if (vls_type == VLS_LOAD
1854 ? vect_load_lanes_supported (vectype, group_size)
1855 : vect_store_lanes_supported (vectype, group_size))
1857 *memory_access_type = VMAT_LOAD_STORE_LANES;
1858 overrun_p = would_overrun_p;
1861 /* If that fails, try using permuting loads. */
1862 if (*memory_access_type == VMAT_ELEMENTWISE
1863 && (vls_type == VLS_LOAD
1864 ? vect_grouped_load_supported (vectype, single_element_p,
1865 group_size)
1866 : vect_grouped_store_supported (vectype, group_size)))
1868 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
1869 overrun_p = would_overrun_p;
1874 if (vls_type != VLS_LOAD && first_stmt == stmt)
1876 /* STMT is the leader of the group. Check the operands of all the
1877 stmts of the group. */
1878 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
1879 while (next_stmt)
1881 gcc_assert (gimple_assign_single_p (next_stmt));
1882 tree op = gimple_assign_rhs1 (next_stmt);
1883 gimple *def_stmt;
1884 enum vect_def_type dt;
1885 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
1887 if (dump_enabled_p ())
1888 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1889 "use not simple.\n");
1890 return false;
1892 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1896 if (overrun_p)
1898 gcc_assert (can_overrun_p);
1899 if (dump_enabled_p ())
1900 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1901 "Data access with gaps requires scalar "
1902 "epilogue loop\n");
1903 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1906 return true;
1909 /* A subroutine of get_load_store_type, with a subset of the same
1910 arguments. Handle the case where STMT is a load or store that
1911 accesses consecutive elements with a negative step. */
1913 static vect_memory_access_type
1914 get_negative_load_store_type (gimple *stmt, tree vectype,
1915 vec_load_store_type vls_type,
1916 unsigned int ncopies)
1918 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1919 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1920 dr_alignment_support alignment_support_scheme;
1922 if (ncopies > 1)
1924 if (dump_enabled_p ())
1925 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1926 "multiple types with negative step.\n");
1927 return VMAT_ELEMENTWISE;
1930 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1931 if (alignment_support_scheme != dr_aligned
1932 && alignment_support_scheme != dr_unaligned_supported)
1934 if (dump_enabled_p ())
1935 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1936 "negative step but alignment required.\n");
1937 return VMAT_ELEMENTWISE;
1940 if (vls_type == VLS_STORE_INVARIANT)
1942 if (dump_enabled_p ())
1943 dump_printf_loc (MSG_NOTE, vect_location,
1944 "negative step with invariant source;"
1945 " no permute needed.\n");
1946 return VMAT_CONTIGUOUS_DOWN;
1949 if (!perm_mask_for_reverse (vectype))
1951 if (dump_enabled_p ())
1952 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1953 "negative step and reversing not supported.\n");
1954 return VMAT_ELEMENTWISE;
1957 return VMAT_CONTIGUOUS_REVERSE;
1960 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1961 if there is a memory access type that the vectorized form can use,
1962 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1963 or scatters, fill in GS_INFO accordingly.
1965 SLP says whether we're performing SLP rather than loop vectorization.
1966 VECTYPE is the vector type that the vectorized statements will use.
1967 NCOPIES is the number of vector statements that will be needed. */
1969 static bool
1970 get_load_store_type (gimple *stmt, tree vectype, bool slp,
1971 vec_load_store_type vls_type, unsigned int ncopies,
1972 vect_memory_access_type *memory_access_type,
1973 gather_scatter_info *gs_info)
1975 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1976 vec_info *vinfo = stmt_info->vinfo;
1977 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1978 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1979 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1981 *memory_access_type = VMAT_GATHER_SCATTER;
1982 gimple *def_stmt;
1983 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
1984 gcc_unreachable ();
1985 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
1986 &gs_info->offset_dt,
1987 &gs_info->offset_vectype))
1989 if (dump_enabled_p ())
1990 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1991 "%s index use not simple.\n",
1992 vls_type == VLS_LOAD ? "gather" : "scatter");
1993 return false;
1996 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1998 if (!get_group_load_store_type (stmt, vectype, slp, vls_type,
1999 memory_access_type))
2000 return false;
2002 else if (STMT_VINFO_STRIDED_P (stmt_info))
2004 gcc_assert (!slp);
2005 *memory_access_type = VMAT_ELEMENTWISE;
2007 else
2009 int cmp = compare_step_with_zero (stmt);
2010 if (cmp < 0)
2011 *memory_access_type = get_negative_load_store_type
2012 (stmt, vectype, vls_type, ncopies);
2013 else if (cmp == 0)
2015 gcc_assert (vls_type == VLS_LOAD);
2016 *memory_access_type = VMAT_INVARIANT;
2018 else
2019 *memory_access_type = VMAT_CONTIGUOUS;
2022 if ((*memory_access_type == VMAT_ELEMENTWISE
2023 || *memory_access_type == VMAT_STRIDED_SLP)
2024 && !nunits.is_constant ())
2026 if (dump_enabled_p ())
2027 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2028 "Not using elementwise accesses due to variable "
2029 "vectorization factor.\n");
2030 return false;
2033 /* FIXME: At the moment the cost model seems to underestimate the
2034 cost of using elementwise accesses. This check preserves the
2035 traditional behavior until that can be fixed. */
2036 if (*memory_access_type == VMAT_ELEMENTWISE
2037 && !STMT_VINFO_STRIDED_P (stmt_info))
2039 if (dump_enabled_p ())
2040 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2041 "not falling back to elementwise accesses\n");
2042 return false;
2044 return true;
2047 /* Return true if boolean argument MASK is suitable for vectorizing
2048 conditional load or store STMT. When returning true, store the
2049 type of the vectorized mask in *MASK_VECTYPE_OUT. */
2051 static bool
2052 vect_check_load_store_mask (gimple *stmt, tree mask, tree *mask_vectype_out)
2054 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2056 if (dump_enabled_p ())
2057 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2058 "mask argument is not a boolean.\n");
2059 return false;
2062 if (TREE_CODE (mask) != SSA_NAME)
2064 if (dump_enabled_p ())
2065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2066 "mask argument is not an SSA name.\n");
2067 return false;
2070 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2071 gimple *def_stmt;
2072 enum vect_def_type dt;
2073 tree mask_vectype;
2074 if (!vect_is_simple_use (mask, stmt_info->vinfo, &def_stmt, &dt,
2075 &mask_vectype))
2077 if (dump_enabled_p ())
2078 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2079 "mask use not simple.\n");
2080 return false;
2083 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2084 if (!mask_vectype)
2085 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2087 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2089 if (dump_enabled_p ())
2090 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2091 "could not find an appropriate vector mask type.\n");
2092 return false;
2095 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2096 TYPE_VECTOR_SUBPARTS (vectype)))
2098 if (dump_enabled_p ())
2100 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2101 "vector mask type ");
2102 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
2103 dump_printf (MSG_MISSED_OPTIMIZATION,
2104 " does not match vector data type ");
2105 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
2106 dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
2108 return false;
2111 *mask_vectype_out = mask_vectype;
2112 return true;
2115 /* Return true if stored value RHS is suitable for vectorizing store
2116 statement STMT. When returning true, store the type of the
2117 vectorized store value in *RHS_VECTYPE_OUT and the type of the
2118 store in *VLS_TYPE_OUT. */
2120 static bool
2121 vect_check_store_rhs (gimple *stmt, tree rhs, tree *rhs_vectype_out,
2122 vec_load_store_type *vls_type_out)
2124 /* In the case this is a store from a constant make sure
2125 native_encode_expr can handle it. */
2126 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2128 if (dump_enabled_p ())
2129 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2130 "cannot encode constant as a byte sequence.\n");
2131 return false;
2134 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2135 gimple *def_stmt;
2136 enum vect_def_type dt;
2137 tree rhs_vectype;
2138 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &def_stmt, &dt,
2139 &rhs_vectype))
2141 if (dump_enabled_p ())
2142 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2143 "use not simple.\n");
2144 return false;
2147 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2148 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2150 if (dump_enabled_p ())
2151 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2152 "incompatible vector types.\n");
2153 return false;
2156 *rhs_vectype_out = rhs_vectype;
2157 if (dt == vect_constant_def || dt == vect_external_def)
2158 *vls_type_out = VLS_STORE_INVARIANT;
2159 else
2160 *vls_type_out = VLS_STORE;
2161 return true;
2164 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2165 Note that we support masks with floating-point type, in which case the
2166 floats are interpreted as a bitmask. */
2168 static tree
2169 vect_build_all_ones_mask (gimple *stmt, tree masktype)
2171 if (TREE_CODE (masktype) == INTEGER_TYPE)
2172 return build_int_cst (masktype, -1);
2173 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2175 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2176 mask = build_vector_from_val (masktype, mask);
2177 return vect_init_vector (stmt, mask, masktype, NULL);
2179 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2181 REAL_VALUE_TYPE r;
2182 long tmp[6];
2183 for (int j = 0; j < 6; ++j)
2184 tmp[j] = -1;
2185 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2186 tree mask = build_real (TREE_TYPE (masktype), r);
2187 mask = build_vector_from_val (masktype, mask);
2188 return vect_init_vector (stmt, mask, masktype, NULL);
2190 gcc_unreachable ();
2193 /* Build an all-zero merge value of type VECTYPE while vectorizing
2194 STMT as a gather load. */
2196 static tree
2197 vect_build_zero_merge_argument (gimple *stmt, tree vectype)
2199 tree merge;
2200 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2201 merge = build_int_cst (TREE_TYPE (vectype), 0);
2202 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2204 REAL_VALUE_TYPE r;
2205 long tmp[6];
2206 for (int j = 0; j < 6; ++j)
2207 tmp[j] = 0;
2208 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2209 merge = build_real (TREE_TYPE (vectype), r);
2211 else
2212 gcc_unreachable ();
2213 merge = build_vector_from_val (vectype, merge);
2214 return vect_init_vector (stmt, merge, vectype, NULL);
2217 /* Build a gather load call while vectorizing STMT. Insert new instructions
2218 before GSI and add them to VEC_STMT. GS_INFO describes the gather load
2219 operation. If the load is conditional, MASK is the unvectorized
2220 condition, otherwise MASK is null. */
2222 static void
2223 vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
2224 gimple **vec_stmt, gather_scatter_info *gs_info,
2225 tree mask)
2227 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2228 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2229 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2230 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2231 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2232 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2233 edge pe = loop_preheader_edge (loop);
2234 enum { NARROW, NONE, WIDEN } modifier;
2235 poly_uint64 gather_off_nunits
2236 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2238 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2239 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2240 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2241 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2242 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2243 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2244 tree scaletype = TREE_VALUE (arglist);
2245 gcc_checking_assert (types_compatible_p (srctype, rettype)
2246 && (!mask || types_compatible_p (srctype, masktype)));
2248 tree perm_mask = NULL_TREE;
2249 tree mask_perm_mask = NULL_TREE;
2250 if (known_eq (nunits, gather_off_nunits))
2251 modifier = NONE;
2252 else if (known_eq (nunits * 2, gather_off_nunits))
2254 modifier = WIDEN;
2256 /* Currently widening gathers and scatters are only supported for
2257 fixed-length vectors. */
2258 int count = gather_off_nunits.to_constant ();
2259 vec_perm_builder sel (count, count, 1);
2260 for (int i = 0; i < count; ++i)
2261 sel.quick_push (i | (count / 2));
2263 vec_perm_indices indices (sel, 1, count);
2264 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2265 indices);
2267 else if (known_eq (nunits, gather_off_nunits * 2))
2269 modifier = NARROW;
2271 /* Currently narrowing gathers and scatters are only supported for
2272 fixed-length vectors. */
2273 int count = nunits.to_constant ();
2274 vec_perm_builder sel (count, count, 1);
2275 sel.quick_grow (count);
2276 for (int i = 0; i < count; ++i)
2277 sel[i] = i < count / 2 ? i : i + count / 2;
2278 vec_perm_indices indices (sel, 2, count);
2279 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2281 ncopies *= 2;
2283 if (mask)
2285 for (int i = 0; i < count; ++i)
2286 sel[i] = i | (count / 2);
2287 indices.new_vector (sel, 2, count);
2288 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2291 else
2292 gcc_unreachable ();
2294 tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
2295 vectype);
2297 tree ptr = fold_convert (ptrtype, gs_info->base);
2298 if (!is_gimple_min_invariant (ptr))
2300 gimple_seq seq;
2301 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2302 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2303 gcc_assert (!new_bb);
2306 tree scale = build_int_cst (scaletype, gs_info->scale);
2308 tree vec_oprnd0 = NULL_TREE;
2309 tree vec_mask = NULL_TREE;
2310 tree src_op = NULL_TREE;
2311 tree mask_op = NULL_TREE;
2312 tree prev_res = NULL_TREE;
2313 stmt_vec_info prev_stmt_info = NULL;
2315 if (!mask)
2317 src_op = vect_build_zero_merge_argument (stmt, rettype);
2318 mask_op = vect_build_all_ones_mask (stmt, masktype);
2321 for (int j = 0; j < ncopies; ++j)
2323 tree op, var;
2324 gimple *new_stmt;
2325 if (modifier == WIDEN && (j & 1))
2326 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2327 perm_mask, stmt, gsi);
2328 else if (j == 0)
2329 op = vec_oprnd0
2330 = vect_get_vec_def_for_operand (gs_info->offset, stmt);
2331 else
2332 op = vec_oprnd0
2333 = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
2335 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2337 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2338 TYPE_VECTOR_SUBPARTS (idxtype)));
2339 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2340 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2341 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2342 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2343 op = var;
2346 if (mask)
2348 if (mask_perm_mask && (j & 1))
2349 mask_op = permute_vec_elements (mask_op, mask_op,
2350 mask_perm_mask, stmt, gsi);
2351 else
2353 if (j == 0)
2354 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2355 else
2357 gimple *def_stmt;
2358 enum vect_def_type dt;
2359 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2360 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2363 mask_op = vec_mask;
2364 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2366 gcc_assert
2367 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2368 TYPE_VECTOR_SUBPARTS (masktype)));
2369 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2370 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2371 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR,
2372 mask_op);
2373 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2374 mask_op = var;
2377 src_op = mask_op;
2380 new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2381 mask_op, scale);
2383 if (!useless_type_conversion_p (vectype, rettype))
2385 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2386 TYPE_VECTOR_SUBPARTS (rettype)));
2387 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2388 gimple_call_set_lhs (new_stmt, op);
2389 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2390 var = make_ssa_name (vec_dest);
2391 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2392 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2394 else
2396 var = make_ssa_name (vec_dest, new_stmt);
2397 gimple_call_set_lhs (new_stmt, var);
2400 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2402 if (modifier == NARROW)
2404 if ((j & 1) == 0)
2406 prev_res = var;
2407 continue;
2409 var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
2410 new_stmt = SSA_NAME_DEF_STMT (var);
2413 if (prev_stmt_info == NULL)
2414 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2415 else
2416 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2417 prev_stmt_info = vinfo_for_stmt (new_stmt);
2421 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2423 static bool
2424 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2425 gimple **vec_stmt, slp_tree slp_node,
2426 tree vectype_in, enum vect_def_type *dt)
2428 tree op, vectype;
2429 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2430 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2431 unsigned ncopies;
2432 unsigned HOST_WIDE_INT nunits, num_bytes;
2434 op = gimple_call_arg (stmt, 0);
2435 vectype = STMT_VINFO_VECTYPE (stmt_info);
2437 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
2438 return false;
2440 /* Multiple types in SLP are handled by creating the appropriate number of
2441 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2442 case of SLP. */
2443 if (slp_node)
2444 ncopies = 1;
2445 else
2446 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2448 gcc_assert (ncopies >= 1);
2450 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2451 if (! char_vectype)
2452 return false;
2454 if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
2455 return false;
2457 unsigned word_bytes = num_bytes / nunits;
2459 /* The encoding uses one stepped pattern for each byte in the word. */
2460 vec_perm_builder elts (num_bytes, word_bytes, 3);
2461 for (unsigned i = 0; i < 3; ++i)
2462 for (unsigned j = 0; j < word_bytes; ++j)
2463 elts.quick_push ((i + 1) * word_bytes - j - 1);
2465 vec_perm_indices indices (elts, 1, num_bytes);
2466 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2467 return false;
2469 if (! vec_stmt)
2471 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2472 if (dump_enabled_p ())
2473 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2474 "\n");
2475 if (! PURE_SLP_STMT (stmt_info))
2477 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2478 1, vector_stmt, stmt_info, 0, vect_prologue);
2479 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2480 ncopies, vec_perm, stmt_info, 0, vect_body);
2482 return true;
2485 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
2487 /* Transform. */
2488 vec<tree> vec_oprnds = vNULL;
2489 gimple *new_stmt = NULL;
2490 stmt_vec_info prev_stmt_info = NULL;
2491 for (unsigned j = 0; j < ncopies; j++)
2493 /* Handle uses. */
2494 if (j == 0)
2495 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2496 else
2497 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2499 /* Arguments are ready. create the new vector stmt. */
2500 unsigned i;
2501 tree vop;
2502 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2504 tree tem = make_ssa_name (char_vectype);
2505 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2506 char_vectype, vop));
2507 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2508 tree tem2 = make_ssa_name (char_vectype);
2509 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2510 tem, tem, bswap_vconst);
2511 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2512 tem = make_ssa_name (vectype);
2513 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2514 vectype, tem2));
2515 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2516 if (slp_node)
2517 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2520 if (slp_node)
2521 continue;
2523 if (j == 0)
2524 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2525 else
2526 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2528 prev_stmt_info = vinfo_for_stmt (new_stmt);
2531 vec_oprnds.release ();
2532 return true;
2535 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2536 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2537 in a single step. On success, store the binary pack code in
2538 *CONVERT_CODE. */
2540 static bool
2541 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2542 tree_code *convert_code)
2544 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2545 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2546 return false;
2548 tree_code code;
2549 int multi_step_cvt = 0;
2550 auto_vec <tree, 8> interm_types;
2551 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2552 &code, &multi_step_cvt,
2553 &interm_types)
2554 || multi_step_cvt)
2555 return false;
2557 *convert_code = code;
2558 return true;
2561 /* Function vectorizable_call.
2563 Check if GS performs a function call that can be vectorized.
2564 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2565 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2566 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2568 static bool
2569 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2570 slp_tree slp_node)
2572 gcall *stmt;
2573 tree vec_dest;
2574 tree scalar_dest;
2575 tree op, type;
2576 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2577 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2578 tree vectype_out, vectype_in;
2579 poly_uint64 nunits_in;
2580 poly_uint64 nunits_out;
2581 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2582 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2583 vec_info *vinfo = stmt_info->vinfo;
2584 tree fndecl, new_temp, rhs_type;
2585 gimple *def_stmt;
2586 enum vect_def_type dt[3]
2587 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2588 int ndts = 3;
2589 gimple *new_stmt = NULL;
2590 int ncopies, j;
2591 vec<tree> vargs = vNULL;
2592 enum { NARROW, NONE, WIDEN } modifier;
2593 size_t i, nargs;
2594 tree lhs;
2596 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2597 return false;
2599 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2600 && ! vec_stmt)
2601 return false;
2603 /* Is GS a vectorizable call? */
2604 stmt = dyn_cast <gcall *> (gs);
2605 if (!stmt)
2606 return false;
2608 if (gimple_call_internal_p (stmt)
2609 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2610 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2611 /* Handled by vectorizable_load and vectorizable_store. */
2612 return false;
2614 if (gimple_call_lhs (stmt) == NULL_TREE
2615 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2616 return false;
2618 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2620 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2622 /* Process function arguments. */
2623 rhs_type = NULL_TREE;
2624 vectype_in = NULL_TREE;
2625 nargs = gimple_call_num_args (stmt);
2627 /* Bail out if the function has more than three arguments, we do not have
2628 interesting builtin functions to vectorize with more than two arguments
2629 except for fma. No arguments is also not good. */
2630 if (nargs == 0 || nargs > 3)
2631 return false;
2633 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2634 if (gimple_call_internal_p (stmt)
2635 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2637 nargs = 0;
2638 rhs_type = unsigned_type_node;
2641 for (i = 0; i < nargs; i++)
2643 tree opvectype;
2645 op = gimple_call_arg (stmt, i);
2647 /* We can only handle calls with arguments of the same type. */
2648 if (rhs_type
2649 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2651 if (dump_enabled_p ())
2652 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2653 "argument types differ.\n");
2654 return false;
2656 if (!rhs_type)
2657 rhs_type = TREE_TYPE (op);
2659 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2661 if (dump_enabled_p ())
2662 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2663 "use not simple.\n");
2664 return false;
2667 if (!vectype_in)
2668 vectype_in = opvectype;
2669 else if (opvectype
2670 && opvectype != vectype_in)
2672 if (dump_enabled_p ())
2673 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2674 "argument vector types differ.\n");
2675 return false;
2678 /* If all arguments are external or constant defs use a vector type with
2679 the same size as the output vector type. */
2680 if (!vectype_in)
2681 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2682 if (vec_stmt)
2683 gcc_assert (vectype_in);
2684 if (!vectype_in)
2686 if (dump_enabled_p ())
2688 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2689 "no vectype for scalar type ");
2690 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2691 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2694 return false;
2697 /* FORNOW */
2698 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2699 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2700 if (known_eq (nunits_in * 2, nunits_out))
2701 modifier = NARROW;
2702 else if (known_eq (nunits_out, nunits_in))
2703 modifier = NONE;
2704 else if (known_eq (nunits_out * 2, nunits_in))
2705 modifier = WIDEN;
2706 else
2707 return false;
2709 /* We only handle functions that do not read or clobber memory. */
2710 if (gimple_vuse (stmt))
2712 if (dump_enabled_p ())
2713 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2714 "function reads from or writes to memory.\n");
2715 return false;
2718 /* For now, we only vectorize functions if a target specific builtin
2719 is available. TODO -- in some cases, it might be profitable to
2720 insert the calls for pieces of the vector, in order to be able
2721 to vectorize other operations in the loop. */
2722 fndecl = NULL_TREE;
2723 internal_fn ifn = IFN_LAST;
2724 combined_fn cfn = gimple_call_combined_fn (stmt);
2725 tree callee = gimple_call_fndecl (stmt);
2727 /* First try using an internal function. */
2728 tree_code convert_code = ERROR_MARK;
2729 if (cfn != CFN_LAST
2730 && (modifier == NONE
2731 || (modifier == NARROW
2732 && simple_integer_narrowing (vectype_out, vectype_in,
2733 &convert_code))))
2734 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2735 vectype_in);
2737 /* If that fails, try asking for a target-specific built-in function. */
2738 if (ifn == IFN_LAST)
2740 if (cfn != CFN_LAST)
2741 fndecl = targetm.vectorize.builtin_vectorized_function
2742 (cfn, vectype_out, vectype_in);
2743 else
2744 fndecl = targetm.vectorize.builtin_md_vectorized_function
2745 (callee, vectype_out, vectype_in);
2748 if (ifn == IFN_LAST && !fndecl)
2750 if (cfn == CFN_GOMP_SIMD_LANE
2751 && !slp_node
2752 && loop_vinfo
2753 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2754 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2755 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2756 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2758 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2759 { 0, 1, 2, ... vf - 1 } vector. */
2760 gcc_assert (nargs == 0);
2762 else if (modifier == NONE
2763 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
2764 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
2765 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
2766 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
2767 vectype_in, dt);
2768 else
2770 if (dump_enabled_p ())
2771 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2772 "function is not vectorizable.\n");
2773 return false;
2777 if (slp_node)
2778 ncopies = 1;
2779 else if (modifier == NARROW && ifn == IFN_LAST)
2780 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
2781 else
2782 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
2784 /* Sanity check: make sure that at least one copy of the vectorized stmt
2785 needs to be generated. */
2786 gcc_assert (ncopies >= 1);
2788 if (!vec_stmt) /* transformation not required. */
2790 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2791 if (dump_enabled_p ())
2792 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2793 "\n");
2794 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
2795 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2796 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2797 vec_promote_demote, stmt_info, 0, vect_body);
2799 return true;
2802 /* Transform. */
2804 if (dump_enabled_p ())
2805 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2807 /* Handle def. */
2808 scalar_dest = gimple_call_lhs (stmt);
2809 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2811 prev_stmt_info = NULL;
2812 if (modifier == NONE || ifn != IFN_LAST)
2814 tree prev_res = NULL_TREE;
2815 for (j = 0; j < ncopies; ++j)
2817 /* Build argument list for the vectorized call. */
2818 if (j == 0)
2819 vargs.create (nargs);
2820 else
2821 vargs.truncate (0);
2823 if (slp_node)
2825 auto_vec<vec<tree> > vec_defs (nargs);
2826 vec<tree> vec_oprnds0;
2828 for (i = 0; i < nargs; i++)
2829 vargs.quick_push (gimple_call_arg (stmt, i));
2830 vect_get_slp_defs (vargs, slp_node, &vec_defs);
2831 vec_oprnds0 = vec_defs[0];
2833 /* Arguments are ready. Create the new vector stmt. */
2834 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2836 size_t k;
2837 for (k = 0; k < nargs; k++)
2839 vec<tree> vec_oprndsk = vec_defs[k];
2840 vargs[k] = vec_oprndsk[i];
2842 if (modifier == NARROW)
2844 tree half_res = make_ssa_name (vectype_in);
2845 gcall *call
2846 = gimple_build_call_internal_vec (ifn, vargs);
2847 gimple_call_set_lhs (call, half_res);
2848 gimple_call_set_nothrow (call, true);
2849 new_stmt = call;
2850 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2851 if ((i & 1) == 0)
2853 prev_res = half_res;
2854 continue;
2856 new_temp = make_ssa_name (vec_dest);
2857 new_stmt = gimple_build_assign (new_temp, convert_code,
2858 prev_res, half_res);
2860 else
2862 gcall *call;
2863 if (ifn != IFN_LAST)
2864 call = gimple_build_call_internal_vec (ifn, vargs);
2865 else
2866 call = gimple_build_call_vec (fndecl, vargs);
2867 new_temp = make_ssa_name (vec_dest, call);
2868 gimple_call_set_lhs (call, new_temp);
2869 gimple_call_set_nothrow (call, true);
2870 new_stmt = call;
2872 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2873 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2876 for (i = 0; i < nargs; i++)
2878 vec<tree> vec_oprndsi = vec_defs[i];
2879 vec_oprndsi.release ();
2881 continue;
2884 for (i = 0; i < nargs; i++)
2886 op = gimple_call_arg (stmt, i);
2887 if (j == 0)
2888 vec_oprnd0
2889 = vect_get_vec_def_for_operand (op, stmt);
2890 else
2892 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2893 vec_oprnd0
2894 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2897 vargs.quick_push (vec_oprnd0);
2900 if (gimple_call_internal_p (stmt)
2901 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2903 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
2904 tree new_var
2905 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2906 gimple *init_stmt = gimple_build_assign (new_var, cst);
2907 vect_init_vector_1 (stmt, init_stmt, NULL);
2908 new_temp = make_ssa_name (vec_dest);
2909 new_stmt = gimple_build_assign (new_temp, new_var);
2911 else if (modifier == NARROW)
2913 tree half_res = make_ssa_name (vectype_in);
2914 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
2915 gimple_call_set_lhs (call, half_res);
2916 gimple_call_set_nothrow (call, true);
2917 new_stmt = call;
2918 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2919 if ((j & 1) == 0)
2921 prev_res = half_res;
2922 continue;
2924 new_temp = make_ssa_name (vec_dest);
2925 new_stmt = gimple_build_assign (new_temp, convert_code,
2926 prev_res, half_res);
2928 else
2930 gcall *call;
2931 if (ifn != IFN_LAST)
2932 call = gimple_build_call_internal_vec (ifn, vargs);
2933 else
2934 call = gimple_build_call_vec (fndecl, vargs);
2935 new_temp = make_ssa_name (vec_dest, new_stmt);
2936 gimple_call_set_lhs (call, new_temp);
2937 gimple_call_set_nothrow (call, true);
2938 new_stmt = call;
2940 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2942 if (j == (modifier == NARROW ? 1 : 0))
2943 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2944 else
2945 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2947 prev_stmt_info = vinfo_for_stmt (new_stmt);
2950 else if (modifier == NARROW)
2952 for (j = 0; j < ncopies; ++j)
2954 /* Build argument list for the vectorized call. */
2955 if (j == 0)
2956 vargs.create (nargs * 2);
2957 else
2958 vargs.truncate (0);
2960 if (slp_node)
2962 auto_vec<vec<tree> > vec_defs (nargs);
2963 vec<tree> vec_oprnds0;
2965 for (i = 0; i < nargs; i++)
2966 vargs.quick_push (gimple_call_arg (stmt, i));
2967 vect_get_slp_defs (vargs, slp_node, &vec_defs);
2968 vec_oprnds0 = vec_defs[0];
2970 /* Arguments are ready. Create the new vector stmt. */
2971 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2973 size_t k;
2974 vargs.truncate (0);
2975 for (k = 0; k < nargs; k++)
2977 vec<tree> vec_oprndsk = vec_defs[k];
2978 vargs.quick_push (vec_oprndsk[i]);
2979 vargs.quick_push (vec_oprndsk[i + 1]);
2981 gcall *call;
2982 if (ifn != IFN_LAST)
2983 call = gimple_build_call_internal_vec (ifn, vargs);
2984 else
2985 call = gimple_build_call_vec (fndecl, vargs);
2986 new_temp = make_ssa_name (vec_dest, call);
2987 gimple_call_set_lhs (call, new_temp);
2988 gimple_call_set_nothrow (call, true);
2989 new_stmt = call;
2990 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2991 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2994 for (i = 0; i < nargs; i++)
2996 vec<tree> vec_oprndsi = vec_defs[i];
2997 vec_oprndsi.release ();
2999 continue;
3002 for (i = 0; i < nargs; i++)
3004 op = gimple_call_arg (stmt, i);
3005 if (j == 0)
3007 vec_oprnd0
3008 = vect_get_vec_def_for_operand (op, stmt);
3009 vec_oprnd1
3010 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3012 else
3014 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3015 vec_oprnd0
3016 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3017 vec_oprnd1
3018 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3021 vargs.quick_push (vec_oprnd0);
3022 vargs.quick_push (vec_oprnd1);
3025 new_stmt = gimple_build_call_vec (fndecl, vargs);
3026 new_temp = make_ssa_name (vec_dest, new_stmt);
3027 gimple_call_set_lhs (new_stmt, new_temp);
3028 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3030 if (j == 0)
3031 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3032 else
3033 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3035 prev_stmt_info = vinfo_for_stmt (new_stmt);
3038 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3040 else
3041 /* No current target implements this case. */
3042 return false;
3044 vargs.release ();
3046 /* The call in STMT might prevent it from being removed in dce.
3047 We however cannot remove it here, due to the way the ssa name
3048 it defines is mapped to the new definition. So just replace
3049 rhs of the statement with something harmless. */
3051 if (slp_node)
3052 return true;
3054 type = TREE_TYPE (scalar_dest);
3055 if (is_pattern_stmt_p (stmt_info))
3056 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3057 else
3058 lhs = gimple_call_lhs (stmt);
3060 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3061 set_vinfo_for_stmt (new_stmt, stmt_info);
3062 set_vinfo_for_stmt (stmt, NULL);
3063 STMT_VINFO_STMT (stmt_info) = new_stmt;
3064 gsi_replace (gsi, new_stmt, false);
3066 return true;
3070 struct simd_call_arg_info
3072 tree vectype;
3073 tree op;
3074 HOST_WIDE_INT linear_step;
3075 enum vect_def_type dt;
3076 unsigned int align;
3077 bool simd_lane_linear;
3080 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3081 is linear within simd lane (but not within whole loop), note it in
3082 *ARGINFO. */
3084 static void
3085 vect_simd_lane_linear (tree op, struct loop *loop,
3086 struct simd_call_arg_info *arginfo)
3088 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3090 if (!is_gimple_assign (def_stmt)
3091 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3092 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3093 return;
3095 tree base = gimple_assign_rhs1 (def_stmt);
3096 HOST_WIDE_INT linear_step = 0;
3097 tree v = gimple_assign_rhs2 (def_stmt);
3098 while (TREE_CODE (v) == SSA_NAME)
3100 tree t;
3101 def_stmt = SSA_NAME_DEF_STMT (v);
3102 if (is_gimple_assign (def_stmt))
3103 switch (gimple_assign_rhs_code (def_stmt))
3105 case PLUS_EXPR:
3106 t = gimple_assign_rhs2 (def_stmt);
3107 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3108 return;
3109 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3110 v = gimple_assign_rhs1 (def_stmt);
3111 continue;
3112 case MULT_EXPR:
3113 t = gimple_assign_rhs2 (def_stmt);
3114 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3115 return;
3116 linear_step = tree_to_shwi (t);
3117 v = gimple_assign_rhs1 (def_stmt);
3118 continue;
3119 CASE_CONVERT:
3120 t = gimple_assign_rhs1 (def_stmt);
3121 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3122 || (TYPE_PRECISION (TREE_TYPE (v))
3123 < TYPE_PRECISION (TREE_TYPE (t))))
3124 return;
3125 if (!linear_step)
3126 linear_step = 1;
3127 v = t;
3128 continue;
3129 default:
3130 return;
3132 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3133 && loop->simduid
3134 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3135 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3136 == loop->simduid))
3138 if (!linear_step)
3139 linear_step = 1;
3140 arginfo->linear_step = linear_step;
3141 arginfo->op = base;
3142 arginfo->simd_lane_linear = true;
3143 return;
3148 /* Return the number of elements in vector type VECTYPE, which is associated
3149 with a SIMD clone. At present these vectors always have a constant
3150 length. */
3152 static unsigned HOST_WIDE_INT
3153 simd_clone_subparts (tree vectype)
3155 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3158 /* Function vectorizable_simd_clone_call.
3160 Check if STMT performs a function call that can be vectorized
3161 by calling a simd clone of the function.
3162 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3163 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3164 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3166 static bool
3167 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3168 gimple **vec_stmt, slp_tree slp_node)
3170 tree vec_dest;
3171 tree scalar_dest;
3172 tree op, type;
3173 tree vec_oprnd0 = NULL_TREE;
3174 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3175 tree vectype;
3176 unsigned int nunits;
3177 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3178 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3179 vec_info *vinfo = stmt_info->vinfo;
3180 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3181 tree fndecl, new_temp;
3182 gimple *def_stmt;
3183 gimple *new_stmt = NULL;
3184 int ncopies, j;
3185 auto_vec<simd_call_arg_info> arginfo;
3186 vec<tree> vargs = vNULL;
3187 size_t i, nargs;
3188 tree lhs, rtype, ratype;
3189 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3191 /* Is STMT a vectorizable call? */
3192 if (!is_gimple_call (stmt))
3193 return false;
3195 fndecl = gimple_call_fndecl (stmt);
3196 if (fndecl == NULL_TREE)
3197 return false;
3199 struct cgraph_node *node = cgraph_node::get (fndecl);
3200 if (node == NULL || node->simd_clones == NULL)
3201 return false;
3203 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3204 return false;
3206 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3207 && ! vec_stmt)
3208 return false;
3210 if (gimple_call_lhs (stmt)
3211 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3212 return false;
3214 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3216 vectype = STMT_VINFO_VECTYPE (stmt_info);
3218 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3219 return false;
3221 /* FORNOW */
3222 if (slp_node)
3223 return false;
3225 /* Process function arguments. */
3226 nargs = gimple_call_num_args (stmt);
3228 /* Bail out if the function has zero arguments. */
3229 if (nargs == 0)
3230 return false;
3232 arginfo.reserve (nargs, true);
3234 for (i = 0; i < nargs; i++)
3236 simd_call_arg_info thisarginfo;
3237 affine_iv iv;
3239 thisarginfo.linear_step = 0;
3240 thisarginfo.align = 0;
3241 thisarginfo.op = NULL_TREE;
3242 thisarginfo.simd_lane_linear = false;
3244 op = gimple_call_arg (stmt, i);
3245 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3246 &thisarginfo.vectype)
3247 || thisarginfo.dt == vect_uninitialized_def)
3249 if (dump_enabled_p ())
3250 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3251 "use not simple.\n");
3252 return false;
3255 if (thisarginfo.dt == vect_constant_def
3256 || thisarginfo.dt == vect_external_def)
3257 gcc_assert (thisarginfo.vectype == NULL_TREE);
3258 else
3259 gcc_assert (thisarginfo.vectype != NULL_TREE);
3261 /* For linear arguments, the analyze phase should have saved
3262 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3263 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3264 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3266 gcc_assert (vec_stmt);
3267 thisarginfo.linear_step
3268 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3269 thisarginfo.op
3270 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3271 thisarginfo.simd_lane_linear
3272 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3273 == boolean_true_node);
3274 /* If loop has been peeled for alignment, we need to adjust it. */
3275 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3276 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3277 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3279 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3280 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3281 tree opt = TREE_TYPE (thisarginfo.op);
3282 bias = fold_convert (TREE_TYPE (step), bias);
3283 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3284 thisarginfo.op
3285 = fold_build2 (POINTER_TYPE_P (opt)
3286 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3287 thisarginfo.op, bias);
3290 else if (!vec_stmt
3291 && thisarginfo.dt != vect_constant_def
3292 && thisarginfo.dt != vect_external_def
3293 && loop_vinfo
3294 && TREE_CODE (op) == SSA_NAME
3295 && simple_iv (loop, loop_containing_stmt (stmt), op,
3296 &iv, false)
3297 && tree_fits_shwi_p (iv.step))
3299 thisarginfo.linear_step = tree_to_shwi (iv.step);
3300 thisarginfo.op = iv.base;
3302 else if ((thisarginfo.dt == vect_constant_def
3303 || thisarginfo.dt == vect_external_def)
3304 && POINTER_TYPE_P (TREE_TYPE (op)))
3305 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3306 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3307 linear too. */
3308 if (POINTER_TYPE_P (TREE_TYPE (op))
3309 && !thisarginfo.linear_step
3310 && !vec_stmt
3311 && thisarginfo.dt != vect_constant_def
3312 && thisarginfo.dt != vect_external_def
3313 && loop_vinfo
3314 && !slp_node
3315 && TREE_CODE (op) == SSA_NAME)
3316 vect_simd_lane_linear (op, loop, &thisarginfo);
3318 arginfo.quick_push (thisarginfo);
3321 unsigned HOST_WIDE_INT vf;
3322 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3324 if (dump_enabled_p ())
3325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3326 "not considering SIMD clones; not yet supported"
3327 " for variable-width vectors.\n");
3328 return NULL;
3331 unsigned int badness = 0;
3332 struct cgraph_node *bestn = NULL;
3333 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3334 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3335 else
3336 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3337 n = n->simdclone->next_clone)
3339 unsigned int this_badness = 0;
3340 if (n->simdclone->simdlen > vf
3341 || n->simdclone->nargs != nargs)
3342 continue;
3343 if (n->simdclone->simdlen < vf)
3344 this_badness += (exact_log2 (vf)
3345 - exact_log2 (n->simdclone->simdlen)) * 1024;
3346 if (n->simdclone->inbranch)
3347 this_badness += 2048;
3348 int target_badness = targetm.simd_clone.usable (n);
3349 if (target_badness < 0)
3350 continue;
3351 this_badness += target_badness * 512;
3352 /* FORNOW: Have to add code to add the mask argument. */
3353 if (n->simdclone->inbranch)
3354 continue;
3355 for (i = 0; i < nargs; i++)
3357 switch (n->simdclone->args[i].arg_type)
3359 case SIMD_CLONE_ARG_TYPE_VECTOR:
3360 if (!useless_type_conversion_p
3361 (n->simdclone->args[i].orig_type,
3362 TREE_TYPE (gimple_call_arg (stmt, i))))
3363 i = -1;
3364 else if (arginfo[i].dt == vect_constant_def
3365 || arginfo[i].dt == vect_external_def
3366 || arginfo[i].linear_step)
3367 this_badness += 64;
3368 break;
3369 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3370 if (arginfo[i].dt != vect_constant_def
3371 && arginfo[i].dt != vect_external_def)
3372 i = -1;
3373 break;
3374 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3375 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3376 if (arginfo[i].dt == vect_constant_def
3377 || arginfo[i].dt == vect_external_def
3378 || (arginfo[i].linear_step
3379 != n->simdclone->args[i].linear_step))
3380 i = -1;
3381 break;
3382 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3383 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3384 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3385 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3386 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3387 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3388 /* FORNOW */
3389 i = -1;
3390 break;
3391 case SIMD_CLONE_ARG_TYPE_MASK:
3392 gcc_unreachable ();
3394 if (i == (size_t) -1)
3395 break;
3396 if (n->simdclone->args[i].alignment > arginfo[i].align)
3398 i = -1;
3399 break;
3401 if (arginfo[i].align)
3402 this_badness += (exact_log2 (arginfo[i].align)
3403 - exact_log2 (n->simdclone->args[i].alignment));
3405 if (i == (size_t) -1)
3406 continue;
3407 if (bestn == NULL || this_badness < badness)
3409 bestn = n;
3410 badness = this_badness;
3414 if (bestn == NULL)
3415 return false;
3417 for (i = 0; i < nargs; i++)
3418 if ((arginfo[i].dt == vect_constant_def
3419 || arginfo[i].dt == vect_external_def)
3420 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3422 arginfo[i].vectype
3423 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3424 i)));
3425 if (arginfo[i].vectype == NULL
3426 || (simd_clone_subparts (arginfo[i].vectype)
3427 > bestn->simdclone->simdlen))
3428 return false;
3431 fndecl = bestn->decl;
3432 nunits = bestn->simdclone->simdlen;
3433 ncopies = vf / nunits;
3435 /* If the function isn't const, only allow it in simd loops where user
3436 has asserted that at least nunits consecutive iterations can be
3437 performed using SIMD instructions. */
3438 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3439 && gimple_vuse (stmt))
3440 return false;
3442 /* Sanity check: make sure that at least one copy of the vectorized stmt
3443 needs to be generated. */
3444 gcc_assert (ncopies >= 1);
3446 if (!vec_stmt) /* transformation not required. */
3448 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3449 for (i = 0; i < nargs; i++)
3450 if ((bestn->simdclone->args[i].arg_type
3451 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3452 || (bestn->simdclone->args[i].arg_type
3453 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3455 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3456 + 1);
3457 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3458 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3459 ? size_type_node : TREE_TYPE (arginfo[i].op);
3460 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3461 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3462 tree sll = arginfo[i].simd_lane_linear
3463 ? boolean_true_node : boolean_false_node;
3464 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3466 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3467 if (dump_enabled_p ())
3468 dump_printf_loc (MSG_NOTE, vect_location,
3469 "=== vectorizable_simd_clone_call ===\n");
3470 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3471 return true;
3474 /* Transform. */
3476 if (dump_enabled_p ())
3477 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3479 /* Handle def. */
3480 scalar_dest = gimple_call_lhs (stmt);
3481 vec_dest = NULL_TREE;
3482 rtype = NULL_TREE;
3483 ratype = NULL_TREE;
3484 if (scalar_dest)
3486 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3487 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3488 if (TREE_CODE (rtype) == ARRAY_TYPE)
3490 ratype = rtype;
3491 rtype = TREE_TYPE (ratype);
3495 prev_stmt_info = NULL;
3496 for (j = 0; j < ncopies; ++j)
3498 /* Build argument list for the vectorized call. */
3499 if (j == 0)
3500 vargs.create (nargs);
3501 else
3502 vargs.truncate (0);
3504 for (i = 0; i < nargs; i++)
3506 unsigned int k, l, m, o;
3507 tree atype;
3508 op = gimple_call_arg (stmt, i);
3509 switch (bestn->simdclone->args[i].arg_type)
3511 case SIMD_CLONE_ARG_TYPE_VECTOR:
3512 atype = bestn->simdclone->args[i].vector_type;
3513 o = nunits / simd_clone_subparts (atype);
3514 for (m = j * o; m < (j + 1) * o; m++)
3516 if (simd_clone_subparts (atype)
3517 < simd_clone_subparts (arginfo[i].vectype))
3519 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3520 k = (simd_clone_subparts (arginfo[i].vectype)
3521 / simd_clone_subparts (atype));
3522 gcc_assert ((k & (k - 1)) == 0);
3523 if (m == 0)
3524 vec_oprnd0
3525 = vect_get_vec_def_for_operand (op, stmt);
3526 else
3528 vec_oprnd0 = arginfo[i].op;
3529 if ((m & (k - 1)) == 0)
3530 vec_oprnd0
3531 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3532 vec_oprnd0);
3534 arginfo[i].op = vec_oprnd0;
3535 vec_oprnd0
3536 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3537 bitsize_int (prec),
3538 bitsize_int ((m & (k - 1)) * prec));
3539 new_stmt
3540 = gimple_build_assign (make_ssa_name (atype),
3541 vec_oprnd0);
3542 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3543 vargs.safe_push (gimple_assign_lhs (new_stmt));
3545 else
3547 k = (simd_clone_subparts (atype)
3548 / simd_clone_subparts (arginfo[i].vectype));
3549 gcc_assert ((k & (k - 1)) == 0);
3550 vec<constructor_elt, va_gc> *ctor_elts;
3551 if (k != 1)
3552 vec_alloc (ctor_elts, k);
3553 else
3554 ctor_elts = NULL;
3555 for (l = 0; l < k; l++)
3557 if (m == 0 && l == 0)
3558 vec_oprnd0
3559 = vect_get_vec_def_for_operand (op, stmt);
3560 else
3561 vec_oprnd0
3562 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3563 arginfo[i].op);
3564 arginfo[i].op = vec_oprnd0;
3565 if (k == 1)
3566 break;
3567 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3568 vec_oprnd0);
3570 if (k == 1)
3571 vargs.safe_push (vec_oprnd0);
3572 else
3574 vec_oprnd0 = build_constructor (atype, ctor_elts);
3575 new_stmt
3576 = gimple_build_assign (make_ssa_name (atype),
3577 vec_oprnd0);
3578 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3579 vargs.safe_push (gimple_assign_lhs (new_stmt));
3583 break;
3584 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3585 vargs.safe_push (op);
3586 break;
3587 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3588 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3589 if (j == 0)
3591 gimple_seq stmts;
3592 arginfo[i].op
3593 = force_gimple_operand (arginfo[i].op, &stmts, true,
3594 NULL_TREE);
3595 if (stmts != NULL)
3597 basic_block new_bb;
3598 edge pe = loop_preheader_edge (loop);
3599 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3600 gcc_assert (!new_bb);
3602 if (arginfo[i].simd_lane_linear)
3604 vargs.safe_push (arginfo[i].op);
3605 break;
3607 tree phi_res = copy_ssa_name (op);
3608 gphi *new_phi = create_phi_node (phi_res, loop->header);
3609 set_vinfo_for_stmt (new_phi,
3610 new_stmt_vec_info (new_phi, loop_vinfo));
3611 add_phi_arg (new_phi, arginfo[i].op,
3612 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3613 enum tree_code code
3614 = POINTER_TYPE_P (TREE_TYPE (op))
3615 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3616 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3617 ? sizetype : TREE_TYPE (op);
3618 widest_int cst
3619 = wi::mul (bestn->simdclone->args[i].linear_step,
3620 ncopies * nunits);
3621 tree tcst = wide_int_to_tree (type, cst);
3622 tree phi_arg = copy_ssa_name (op);
3623 new_stmt
3624 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3625 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3626 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3627 set_vinfo_for_stmt (new_stmt,
3628 new_stmt_vec_info (new_stmt, loop_vinfo));
3629 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3630 UNKNOWN_LOCATION);
3631 arginfo[i].op = phi_res;
3632 vargs.safe_push (phi_res);
3634 else
3636 enum tree_code code
3637 = POINTER_TYPE_P (TREE_TYPE (op))
3638 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3639 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3640 ? sizetype : TREE_TYPE (op);
3641 widest_int cst
3642 = wi::mul (bestn->simdclone->args[i].linear_step,
3643 j * nunits);
3644 tree tcst = wide_int_to_tree (type, cst);
3645 new_temp = make_ssa_name (TREE_TYPE (op));
3646 new_stmt = gimple_build_assign (new_temp, code,
3647 arginfo[i].op, tcst);
3648 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3649 vargs.safe_push (new_temp);
3651 break;
3652 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3653 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3654 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3655 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3656 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3657 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3658 default:
3659 gcc_unreachable ();
3663 new_stmt = gimple_build_call_vec (fndecl, vargs);
3664 if (vec_dest)
3666 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
3667 if (ratype)
3668 new_temp = create_tmp_var (ratype);
3669 else if (simd_clone_subparts (vectype)
3670 == simd_clone_subparts (rtype))
3671 new_temp = make_ssa_name (vec_dest, new_stmt);
3672 else
3673 new_temp = make_ssa_name (rtype, new_stmt);
3674 gimple_call_set_lhs (new_stmt, new_temp);
3676 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3678 if (vec_dest)
3680 if (simd_clone_subparts (vectype) < nunits)
3682 unsigned int k, l;
3683 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3684 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
3685 k = nunits / simd_clone_subparts (vectype);
3686 gcc_assert ((k & (k - 1)) == 0);
3687 for (l = 0; l < k; l++)
3689 tree t;
3690 if (ratype)
3692 t = build_fold_addr_expr (new_temp);
3693 t = build2 (MEM_REF, vectype, t,
3694 build_int_cst (TREE_TYPE (t), l * bytes));
3696 else
3697 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3698 bitsize_int (prec), bitsize_int (l * prec));
3699 new_stmt
3700 = gimple_build_assign (make_ssa_name (vectype), t);
3701 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3702 if (j == 0 && l == 0)
3703 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3704 else
3705 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3707 prev_stmt_info = vinfo_for_stmt (new_stmt);
3710 if (ratype)
3712 tree clobber = build_constructor (ratype, NULL);
3713 TREE_THIS_VOLATILE (clobber) = 1;
3714 new_stmt = gimple_build_assign (new_temp, clobber);
3715 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3717 continue;
3719 else if (simd_clone_subparts (vectype) > nunits)
3721 unsigned int k = (simd_clone_subparts (vectype)
3722 / simd_clone_subparts (rtype));
3723 gcc_assert ((k & (k - 1)) == 0);
3724 if ((j & (k - 1)) == 0)
3725 vec_alloc (ret_ctor_elts, k);
3726 if (ratype)
3728 unsigned int m, o = nunits / simd_clone_subparts (rtype);
3729 for (m = 0; m < o; m++)
3731 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3732 size_int (m), NULL_TREE, NULL_TREE);
3733 new_stmt
3734 = gimple_build_assign (make_ssa_name (rtype), tem);
3735 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3736 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3737 gimple_assign_lhs (new_stmt));
3739 tree clobber = build_constructor (ratype, NULL);
3740 TREE_THIS_VOLATILE (clobber) = 1;
3741 new_stmt = gimple_build_assign (new_temp, clobber);
3742 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3744 else
3745 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3746 if ((j & (k - 1)) != k - 1)
3747 continue;
3748 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3749 new_stmt
3750 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3751 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3753 if ((unsigned) j == k - 1)
3754 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3755 else
3756 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3758 prev_stmt_info = vinfo_for_stmt (new_stmt);
3759 continue;
3761 else if (ratype)
3763 tree t = build_fold_addr_expr (new_temp);
3764 t = build2 (MEM_REF, vectype, t,
3765 build_int_cst (TREE_TYPE (t), 0));
3766 new_stmt
3767 = gimple_build_assign (make_ssa_name (vec_dest), t);
3768 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3769 tree clobber = build_constructor (ratype, NULL);
3770 TREE_THIS_VOLATILE (clobber) = 1;
3771 vect_finish_stmt_generation (stmt,
3772 gimple_build_assign (new_temp,
3773 clobber), gsi);
3777 if (j == 0)
3778 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3779 else
3780 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3782 prev_stmt_info = vinfo_for_stmt (new_stmt);
3785 vargs.release ();
3787 /* The call in STMT might prevent it from being removed in dce.
3788 We however cannot remove it here, due to the way the ssa name
3789 it defines is mapped to the new definition. So just replace
3790 rhs of the statement with something harmless. */
3792 if (slp_node)
3793 return true;
3795 if (scalar_dest)
3797 type = TREE_TYPE (scalar_dest);
3798 if (is_pattern_stmt_p (stmt_info))
3799 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3800 else
3801 lhs = gimple_call_lhs (stmt);
3802 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3804 else
3805 new_stmt = gimple_build_nop ();
3806 set_vinfo_for_stmt (new_stmt, stmt_info);
3807 set_vinfo_for_stmt (stmt, NULL);
3808 STMT_VINFO_STMT (stmt_info) = new_stmt;
3809 gsi_replace (gsi, new_stmt, true);
3810 unlink_stmt_vdef (stmt);
3812 return true;
3816 /* Function vect_gen_widened_results_half
3818 Create a vector stmt whose code, type, number of arguments, and result
3819 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3820 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3821 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3822 needs to be created (DECL is a function-decl of a target-builtin).
3823 STMT is the original scalar stmt that we are vectorizing. */
3825 static gimple *
3826 vect_gen_widened_results_half (enum tree_code code,
3827 tree decl,
3828 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3829 tree vec_dest, gimple_stmt_iterator *gsi,
3830 gimple *stmt)
3832 gimple *new_stmt;
3833 tree new_temp;
3835 /* Generate half of the widened result: */
3836 if (code == CALL_EXPR)
3838 /* Target specific support */
3839 if (op_type == binary_op)
3840 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3841 else
3842 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3843 new_temp = make_ssa_name (vec_dest, new_stmt);
3844 gimple_call_set_lhs (new_stmt, new_temp);
3846 else
3848 /* Generic support */
3849 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3850 if (op_type != binary_op)
3851 vec_oprnd1 = NULL;
3852 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3853 new_temp = make_ssa_name (vec_dest, new_stmt);
3854 gimple_assign_set_lhs (new_stmt, new_temp);
3856 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3858 return new_stmt;
3862 /* Get vectorized definitions for loop-based vectorization. For the first
3863 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3864 scalar operand), and for the rest we get a copy with
3865 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3866 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3867 The vectors are collected into VEC_OPRNDS. */
3869 static void
3870 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3871 vec<tree> *vec_oprnds, int multi_step_cvt)
3873 tree vec_oprnd;
3875 /* Get first vector operand. */
3876 /* All the vector operands except the very first one (that is scalar oprnd)
3877 are stmt copies. */
3878 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3879 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3880 else
3881 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3883 vec_oprnds->quick_push (vec_oprnd);
3885 /* Get second vector operand. */
3886 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3887 vec_oprnds->quick_push (vec_oprnd);
3889 *oprnd = vec_oprnd;
3891 /* For conversion in multiple steps, continue to get operands
3892 recursively. */
3893 if (multi_step_cvt)
3894 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3898 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3899 For multi-step conversions store the resulting vectors and call the function
3900 recursively. */
3902 static void
3903 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3904 int multi_step_cvt, gimple *stmt,
3905 vec<tree> vec_dsts,
3906 gimple_stmt_iterator *gsi,
3907 slp_tree slp_node, enum tree_code code,
3908 stmt_vec_info *prev_stmt_info)
3910 unsigned int i;
3911 tree vop0, vop1, new_tmp, vec_dest;
3912 gimple *new_stmt;
3913 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3915 vec_dest = vec_dsts.pop ();
3917 for (i = 0; i < vec_oprnds->length (); i += 2)
3919 /* Create demotion operation. */
3920 vop0 = (*vec_oprnds)[i];
3921 vop1 = (*vec_oprnds)[i + 1];
3922 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3923 new_tmp = make_ssa_name (vec_dest, new_stmt);
3924 gimple_assign_set_lhs (new_stmt, new_tmp);
3925 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3927 if (multi_step_cvt)
3928 /* Store the resulting vector for next recursive call. */
3929 (*vec_oprnds)[i/2] = new_tmp;
3930 else
3932 /* This is the last step of the conversion sequence. Store the
3933 vectors in SLP_NODE or in vector info of the scalar statement
3934 (or in STMT_VINFO_RELATED_STMT chain). */
3935 if (slp_node)
3936 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3937 else
3939 if (!*prev_stmt_info)
3940 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3941 else
3942 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3944 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3949 /* For multi-step demotion operations we first generate demotion operations
3950 from the source type to the intermediate types, and then combine the
3951 results (stored in VEC_OPRNDS) in demotion operation to the destination
3952 type. */
3953 if (multi_step_cvt)
3955 /* At each level of recursion we have half of the operands we had at the
3956 previous level. */
3957 vec_oprnds->truncate ((i+1)/2);
3958 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3959 stmt, vec_dsts, gsi, slp_node,
3960 VEC_PACK_TRUNC_EXPR,
3961 prev_stmt_info);
3964 vec_dsts.quick_push (vec_dest);
3968 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3969 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3970 the resulting vectors and call the function recursively. */
3972 static void
3973 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3974 vec<tree> *vec_oprnds1,
3975 gimple *stmt, tree vec_dest,
3976 gimple_stmt_iterator *gsi,
3977 enum tree_code code1,
3978 enum tree_code code2, tree decl1,
3979 tree decl2, int op_type)
3981 int i;
3982 tree vop0, vop1, new_tmp1, new_tmp2;
3983 gimple *new_stmt1, *new_stmt2;
3984 vec<tree> vec_tmp = vNULL;
3986 vec_tmp.create (vec_oprnds0->length () * 2);
3987 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3989 if (op_type == binary_op)
3990 vop1 = (*vec_oprnds1)[i];
3991 else
3992 vop1 = NULL_TREE;
3994 /* Generate the two halves of promotion operation. */
3995 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3996 op_type, vec_dest, gsi, stmt);
3997 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3998 op_type, vec_dest, gsi, stmt);
3999 if (is_gimple_call (new_stmt1))
4001 new_tmp1 = gimple_call_lhs (new_stmt1);
4002 new_tmp2 = gimple_call_lhs (new_stmt2);
4004 else
4006 new_tmp1 = gimple_assign_lhs (new_stmt1);
4007 new_tmp2 = gimple_assign_lhs (new_stmt2);
4010 /* Store the results for the next step. */
4011 vec_tmp.quick_push (new_tmp1);
4012 vec_tmp.quick_push (new_tmp2);
4015 vec_oprnds0->release ();
4016 *vec_oprnds0 = vec_tmp;
4020 /* Check if STMT performs a conversion operation, that can be vectorized.
4021 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4022 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4023 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4025 static bool
4026 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4027 gimple **vec_stmt, slp_tree slp_node)
4029 tree vec_dest;
4030 tree scalar_dest;
4031 tree op0, op1 = NULL_TREE;
4032 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4033 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4034 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4035 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4036 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4037 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4038 tree new_temp;
4039 gimple *def_stmt;
4040 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4041 int ndts = 2;
4042 gimple *new_stmt = NULL;
4043 stmt_vec_info prev_stmt_info;
4044 poly_uint64 nunits_in;
4045 poly_uint64 nunits_out;
4046 tree vectype_out, vectype_in;
4047 int ncopies, i, j;
4048 tree lhs_type, rhs_type;
4049 enum { NARROW, NONE, WIDEN } modifier;
4050 vec<tree> vec_oprnds0 = vNULL;
4051 vec<tree> vec_oprnds1 = vNULL;
4052 tree vop0;
4053 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4054 vec_info *vinfo = stmt_info->vinfo;
4055 int multi_step_cvt = 0;
4056 vec<tree> interm_types = vNULL;
4057 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4058 int op_type;
4059 unsigned short fltsz;
4061 /* Is STMT a vectorizable conversion? */
4063 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4064 return false;
4066 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4067 && ! vec_stmt)
4068 return false;
4070 if (!is_gimple_assign (stmt))
4071 return false;
4073 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4074 return false;
4076 code = gimple_assign_rhs_code (stmt);
4077 if (!CONVERT_EXPR_CODE_P (code)
4078 && code != FIX_TRUNC_EXPR
4079 && code != FLOAT_EXPR
4080 && code != WIDEN_MULT_EXPR
4081 && code != WIDEN_LSHIFT_EXPR)
4082 return false;
4084 op_type = TREE_CODE_LENGTH (code);
4086 /* Check types of lhs and rhs. */
4087 scalar_dest = gimple_assign_lhs (stmt);
4088 lhs_type = TREE_TYPE (scalar_dest);
4089 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4091 op0 = gimple_assign_rhs1 (stmt);
4092 rhs_type = TREE_TYPE (op0);
4094 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4095 && !((INTEGRAL_TYPE_P (lhs_type)
4096 && INTEGRAL_TYPE_P (rhs_type))
4097 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4098 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4099 return false;
4101 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4102 && ((INTEGRAL_TYPE_P (lhs_type)
4103 && !type_has_mode_precision_p (lhs_type))
4104 || (INTEGRAL_TYPE_P (rhs_type)
4105 && !type_has_mode_precision_p (rhs_type))))
4107 if (dump_enabled_p ())
4108 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4109 "type conversion to/from bit-precision unsupported."
4110 "\n");
4111 return false;
4114 /* Check the operands of the operation. */
4115 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4117 if (dump_enabled_p ())
4118 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4119 "use not simple.\n");
4120 return false;
4122 if (op_type == binary_op)
4124 bool ok;
4126 op1 = gimple_assign_rhs2 (stmt);
4127 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4128 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4129 OP1. */
4130 if (CONSTANT_CLASS_P (op0))
4131 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4132 else
4133 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4135 if (!ok)
4137 if (dump_enabled_p ())
4138 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4139 "use not simple.\n");
4140 return false;
4144 /* If op0 is an external or constant defs use a vector type of
4145 the same size as the output vector type. */
4146 if (!vectype_in)
4147 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4148 if (vec_stmt)
4149 gcc_assert (vectype_in);
4150 if (!vectype_in)
4152 if (dump_enabled_p ())
4154 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4155 "no vectype for scalar type ");
4156 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4157 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4160 return false;
4163 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4164 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4166 if (dump_enabled_p ())
4168 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4169 "can't convert between boolean and non "
4170 "boolean vectors");
4171 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4172 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4175 return false;
4178 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4179 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4180 if (known_eq (nunits_out, nunits_in))
4181 modifier = NONE;
4182 else if (multiple_p (nunits_out, nunits_in))
4183 modifier = NARROW;
4184 else
4186 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4187 modifier = WIDEN;
4190 /* Multiple types in SLP are handled by creating the appropriate number of
4191 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4192 case of SLP. */
4193 if (slp_node)
4194 ncopies = 1;
4195 else if (modifier == NARROW)
4196 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4197 else
4198 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4200 /* Sanity check: make sure that at least one copy of the vectorized stmt
4201 needs to be generated. */
4202 gcc_assert (ncopies >= 1);
4204 bool found_mode = false;
4205 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4206 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4207 opt_scalar_mode rhs_mode_iter;
4209 /* Supportable by target? */
4210 switch (modifier)
4212 case NONE:
4213 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4214 return false;
4215 if (supportable_convert_operation (code, vectype_out, vectype_in,
4216 &decl1, &code1))
4217 break;
4218 /* FALLTHRU */
4219 unsupported:
4220 if (dump_enabled_p ())
4221 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4222 "conversion not supported by target.\n");
4223 return false;
4225 case WIDEN:
4226 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4227 &code1, &code2, &multi_step_cvt,
4228 &interm_types))
4230 /* Binary widening operation can only be supported directly by the
4231 architecture. */
4232 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4233 break;
4236 if (code != FLOAT_EXPR
4237 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4238 goto unsupported;
4240 fltsz = GET_MODE_SIZE (lhs_mode);
4241 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4243 rhs_mode = rhs_mode_iter.require ();
4244 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4245 break;
4247 cvt_type
4248 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4249 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4250 if (cvt_type == NULL_TREE)
4251 goto unsupported;
4253 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4255 if (!supportable_convert_operation (code, vectype_out,
4256 cvt_type, &decl1, &codecvt1))
4257 goto unsupported;
4259 else if (!supportable_widening_operation (code, stmt, vectype_out,
4260 cvt_type, &codecvt1,
4261 &codecvt2, &multi_step_cvt,
4262 &interm_types))
4263 continue;
4264 else
4265 gcc_assert (multi_step_cvt == 0);
4267 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4268 vectype_in, &code1, &code2,
4269 &multi_step_cvt, &interm_types))
4271 found_mode = true;
4272 break;
4276 if (!found_mode)
4277 goto unsupported;
4279 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4280 codecvt2 = ERROR_MARK;
4281 else
4283 multi_step_cvt++;
4284 interm_types.safe_push (cvt_type);
4285 cvt_type = NULL_TREE;
4287 break;
4289 case NARROW:
4290 gcc_assert (op_type == unary_op);
4291 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4292 &code1, &multi_step_cvt,
4293 &interm_types))
4294 break;
4296 if (code != FIX_TRUNC_EXPR
4297 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4298 goto unsupported;
4300 cvt_type
4301 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4302 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4303 if (cvt_type == NULL_TREE)
4304 goto unsupported;
4305 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4306 &decl1, &codecvt1))
4307 goto unsupported;
4308 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4309 &code1, &multi_step_cvt,
4310 &interm_types))
4311 break;
4312 goto unsupported;
4314 default:
4315 gcc_unreachable ();
4318 if (!vec_stmt) /* transformation not required. */
4320 if (dump_enabled_p ())
4321 dump_printf_loc (MSG_NOTE, vect_location,
4322 "=== vectorizable_conversion ===\n");
4323 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4325 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4326 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4328 else if (modifier == NARROW)
4330 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4331 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4333 else
4335 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4336 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4338 interm_types.release ();
4339 return true;
4342 /* Transform. */
4343 if (dump_enabled_p ())
4344 dump_printf_loc (MSG_NOTE, vect_location,
4345 "transform conversion. ncopies = %d.\n", ncopies);
4347 if (op_type == binary_op)
4349 if (CONSTANT_CLASS_P (op0))
4350 op0 = fold_convert (TREE_TYPE (op1), op0);
4351 else if (CONSTANT_CLASS_P (op1))
4352 op1 = fold_convert (TREE_TYPE (op0), op1);
4355 /* In case of multi-step conversion, we first generate conversion operations
4356 to the intermediate types, and then from that types to the final one.
4357 We create vector destinations for the intermediate type (TYPES) received
4358 from supportable_*_operation, and store them in the correct order
4359 for future use in vect_create_vectorized_*_stmts (). */
4360 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4361 vec_dest = vect_create_destination_var (scalar_dest,
4362 (cvt_type && modifier == WIDEN)
4363 ? cvt_type : vectype_out);
4364 vec_dsts.quick_push (vec_dest);
4366 if (multi_step_cvt)
4368 for (i = interm_types.length () - 1;
4369 interm_types.iterate (i, &intermediate_type); i--)
4371 vec_dest = vect_create_destination_var (scalar_dest,
4372 intermediate_type);
4373 vec_dsts.quick_push (vec_dest);
4377 if (cvt_type)
4378 vec_dest = vect_create_destination_var (scalar_dest,
4379 modifier == WIDEN
4380 ? vectype_out : cvt_type);
4382 if (!slp_node)
4384 if (modifier == WIDEN)
4386 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4387 if (op_type == binary_op)
4388 vec_oprnds1.create (1);
4390 else if (modifier == NARROW)
4391 vec_oprnds0.create (
4392 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4394 else if (code == WIDEN_LSHIFT_EXPR)
4395 vec_oprnds1.create (slp_node->vec_stmts_size);
4397 last_oprnd = op0;
4398 prev_stmt_info = NULL;
4399 switch (modifier)
4401 case NONE:
4402 for (j = 0; j < ncopies; j++)
4404 if (j == 0)
4405 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
4406 else
4407 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4409 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4411 /* Arguments are ready, create the new vector stmt. */
4412 if (code1 == CALL_EXPR)
4414 new_stmt = gimple_build_call (decl1, 1, vop0);
4415 new_temp = make_ssa_name (vec_dest, new_stmt);
4416 gimple_call_set_lhs (new_stmt, new_temp);
4418 else
4420 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4421 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4422 new_temp = make_ssa_name (vec_dest, new_stmt);
4423 gimple_assign_set_lhs (new_stmt, new_temp);
4426 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4427 if (slp_node)
4428 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4429 else
4431 if (!prev_stmt_info)
4432 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4433 else
4434 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4435 prev_stmt_info = vinfo_for_stmt (new_stmt);
4439 break;
4441 case WIDEN:
4442 /* In case the vectorization factor (VF) is bigger than the number
4443 of elements that we can fit in a vectype (nunits), we have to
4444 generate more than one vector stmt - i.e - we need to "unroll"
4445 the vector stmt by a factor VF/nunits. */
4446 for (j = 0; j < ncopies; j++)
4448 /* Handle uses. */
4449 if (j == 0)
4451 if (slp_node)
4453 if (code == WIDEN_LSHIFT_EXPR)
4455 unsigned int k;
4457 vec_oprnd1 = op1;
4458 /* Store vec_oprnd1 for every vector stmt to be created
4459 for SLP_NODE. We check during the analysis that all
4460 the shift arguments are the same. */
4461 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4462 vec_oprnds1.quick_push (vec_oprnd1);
4464 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4465 slp_node);
4467 else
4468 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4469 &vec_oprnds1, slp_node);
4471 else
4473 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4474 vec_oprnds0.quick_push (vec_oprnd0);
4475 if (op_type == binary_op)
4477 if (code == WIDEN_LSHIFT_EXPR)
4478 vec_oprnd1 = op1;
4479 else
4480 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4481 vec_oprnds1.quick_push (vec_oprnd1);
4485 else
4487 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4488 vec_oprnds0.truncate (0);
4489 vec_oprnds0.quick_push (vec_oprnd0);
4490 if (op_type == binary_op)
4492 if (code == WIDEN_LSHIFT_EXPR)
4493 vec_oprnd1 = op1;
4494 else
4495 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4496 vec_oprnd1);
4497 vec_oprnds1.truncate (0);
4498 vec_oprnds1.quick_push (vec_oprnd1);
4502 /* Arguments are ready. Create the new vector stmts. */
4503 for (i = multi_step_cvt; i >= 0; i--)
4505 tree this_dest = vec_dsts[i];
4506 enum tree_code c1 = code1, c2 = code2;
4507 if (i == 0 && codecvt2 != ERROR_MARK)
4509 c1 = codecvt1;
4510 c2 = codecvt2;
4512 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4513 &vec_oprnds1,
4514 stmt, this_dest, gsi,
4515 c1, c2, decl1, decl2,
4516 op_type);
4519 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4521 if (cvt_type)
4523 if (codecvt1 == CALL_EXPR)
4525 new_stmt = gimple_build_call (decl1, 1, vop0);
4526 new_temp = make_ssa_name (vec_dest, new_stmt);
4527 gimple_call_set_lhs (new_stmt, new_temp);
4529 else
4531 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4532 new_temp = make_ssa_name (vec_dest);
4533 new_stmt = gimple_build_assign (new_temp, codecvt1,
4534 vop0);
4537 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4539 else
4540 new_stmt = SSA_NAME_DEF_STMT (vop0);
4542 if (slp_node)
4543 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4544 else
4546 if (!prev_stmt_info)
4547 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4548 else
4549 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4550 prev_stmt_info = vinfo_for_stmt (new_stmt);
4555 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4556 break;
4558 case NARROW:
4559 /* In case the vectorization factor (VF) is bigger than the number
4560 of elements that we can fit in a vectype (nunits), we have to
4561 generate more than one vector stmt - i.e - we need to "unroll"
4562 the vector stmt by a factor VF/nunits. */
4563 for (j = 0; j < ncopies; j++)
4565 /* Handle uses. */
4566 if (slp_node)
4567 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4568 slp_node);
4569 else
4571 vec_oprnds0.truncate (0);
4572 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4573 vect_pow2 (multi_step_cvt) - 1);
4576 /* Arguments are ready. Create the new vector stmts. */
4577 if (cvt_type)
4578 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4580 if (codecvt1 == CALL_EXPR)
4582 new_stmt = gimple_build_call (decl1, 1, vop0);
4583 new_temp = make_ssa_name (vec_dest, new_stmt);
4584 gimple_call_set_lhs (new_stmt, new_temp);
4586 else
4588 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4589 new_temp = make_ssa_name (vec_dest);
4590 new_stmt = gimple_build_assign (new_temp, codecvt1,
4591 vop0);
4594 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4595 vec_oprnds0[i] = new_temp;
4598 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4599 stmt, vec_dsts, gsi,
4600 slp_node, code1,
4601 &prev_stmt_info);
4604 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4605 break;
4608 vec_oprnds0.release ();
4609 vec_oprnds1.release ();
4610 interm_types.release ();
4612 return true;
4616 /* Function vectorizable_assignment.
4618 Check if STMT performs an assignment (copy) that can be vectorized.
4619 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4620 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4621 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4623 static bool
4624 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4625 gimple **vec_stmt, slp_tree slp_node)
4627 tree vec_dest;
4628 tree scalar_dest;
4629 tree op;
4630 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4631 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4632 tree new_temp;
4633 gimple *def_stmt;
4634 enum vect_def_type dt[1] = {vect_unknown_def_type};
4635 int ndts = 1;
4636 int ncopies;
4637 int i, j;
4638 vec<tree> vec_oprnds = vNULL;
4639 tree vop;
4640 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4641 vec_info *vinfo = stmt_info->vinfo;
4642 gimple *new_stmt = NULL;
4643 stmt_vec_info prev_stmt_info = NULL;
4644 enum tree_code code;
4645 tree vectype_in;
4647 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4648 return false;
4650 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4651 && ! vec_stmt)
4652 return false;
4654 /* Is vectorizable assignment? */
4655 if (!is_gimple_assign (stmt))
4656 return false;
4658 scalar_dest = gimple_assign_lhs (stmt);
4659 if (TREE_CODE (scalar_dest) != SSA_NAME)
4660 return false;
4662 code = gimple_assign_rhs_code (stmt);
4663 if (gimple_assign_single_p (stmt)
4664 || code == PAREN_EXPR
4665 || CONVERT_EXPR_CODE_P (code))
4666 op = gimple_assign_rhs1 (stmt);
4667 else
4668 return false;
4670 if (code == VIEW_CONVERT_EXPR)
4671 op = TREE_OPERAND (op, 0);
4673 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4674 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4676 /* Multiple types in SLP are handled by creating the appropriate number of
4677 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4678 case of SLP. */
4679 if (slp_node)
4680 ncopies = 1;
4681 else
4682 ncopies = vect_get_num_copies (loop_vinfo, vectype);
4684 gcc_assert (ncopies >= 1);
4686 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4688 if (dump_enabled_p ())
4689 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4690 "use not simple.\n");
4691 return false;
4694 /* We can handle NOP_EXPR conversions that do not change the number
4695 of elements or the vector size. */
4696 if ((CONVERT_EXPR_CODE_P (code)
4697 || code == VIEW_CONVERT_EXPR)
4698 && (!vectype_in
4699 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
4700 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
4701 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4702 return false;
4704 /* We do not handle bit-precision changes. */
4705 if ((CONVERT_EXPR_CODE_P (code)
4706 || code == VIEW_CONVERT_EXPR)
4707 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4708 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
4709 || !type_has_mode_precision_p (TREE_TYPE (op)))
4710 /* But a conversion that does not change the bit-pattern is ok. */
4711 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4712 > TYPE_PRECISION (TREE_TYPE (op)))
4713 && TYPE_UNSIGNED (TREE_TYPE (op)))
4714 /* Conversion between boolean types of different sizes is
4715 a simple assignment in case their vectypes are same
4716 boolean vectors. */
4717 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4718 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4720 if (dump_enabled_p ())
4721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4722 "type conversion to/from bit-precision "
4723 "unsupported.\n");
4724 return false;
4727 if (!vec_stmt) /* transformation not required. */
4729 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4730 if (dump_enabled_p ())
4731 dump_printf_loc (MSG_NOTE, vect_location,
4732 "=== vectorizable_assignment ===\n");
4733 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4734 return true;
4737 /* Transform. */
4738 if (dump_enabled_p ())
4739 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4741 /* Handle def. */
4742 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4744 /* Handle use. */
4745 for (j = 0; j < ncopies; j++)
4747 /* Handle uses. */
4748 if (j == 0)
4749 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
4750 else
4751 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4753 /* Arguments are ready. create the new vector stmt. */
4754 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4756 if (CONVERT_EXPR_CODE_P (code)
4757 || code == VIEW_CONVERT_EXPR)
4758 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4759 new_stmt = gimple_build_assign (vec_dest, vop);
4760 new_temp = make_ssa_name (vec_dest, new_stmt);
4761 gimple_assign_set_lhs (new_stmt, new_temp);
4762 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4763 if (slp_node)
4764 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4767 if (slp_node)
4768 continue;
4770 if (j == 0)
4771 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4772 else
4773 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4775 prev_stmt_info = vinfo_for_stmt (new_stmt);
4778 vec_oprnds.release ();
4779 return true;
4783 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4784 either as shift by a scalar or by a vector. */
4786 bool
4787 vect_supportable_shift (enum tree_code code, tree scalar_type)
4790 machine_mode vec_mode;
4791 optab optab;
4792 int icode;
4793 tree vectype;
4795 vectype = get_vectype_for_scalar_type (scalar_type);
4796 if (!vectype)
4797 return false;
4799 optab = optab_for_tree_code (code, vectype, optab_scalar);
4800 if (!optab
4801 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4803 optab = optab_for_tree_code (code, vectype, optab_vector);
4804 if (!optab
4805 || (optab_handler (optab, TYPE_MODE (vectype))
4806 == CODE_FOR_nothing))
4807 return false;
4810 vec_mode = TYPE_MODE (vectype);
4811 icode = (int) optab_handler (optab, vec_mode);
4812 if (icode == CODE_FOR_nothing)
4813 return false;
4815 return true;
4819 /* Function vectorizable_shift.
4821 Check if STMT performs a shift operation that can be vectorized.
4822 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4823 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4824 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4826 static bool
4827 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4828 gimple **vec_stmt, slp_tree slp_node)
4830 tree vec_dest;
4831 tree scalar_dest;
4832 tree op0, op1 = NULL;
4833 tree vec_oprnd1 = NULL_TREE;
4834 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4835 tree vectype;
4836 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4837 enum tree_code code;
4838 machine_mode vec_mode;
4839 tree new_temp;
4840 optab optab;
4841 int icode;
4842 machine_mode optab_op2_mode;
4843 gimple *def_stmt;
4844 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4845 int ndts = 2;
4846 gimple *new_stmt = NULL;
4847 stmt_vec_info prev_stmt_info;
4848 poly_uint64 nunits_in;
4849 poly_uint64 nunits_out;
4850 tree vectype_out;
4851 tree op1_vectype;
4852 int ncopies;
4853 int j, i;
4854 vec<tree> vec_oprnds0 = vNULL;
4855 vec<tree> vec_oprnds1 = vNULL;
4856 tree vop0, vop1;
4857 unsigned int k;
4858 bool scalar_shift_arg = true;
4859 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4860 vec_info *vinfo = stmt_info->vinfo;
4862 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4863 return false;
4865 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4866 && ! vec_stmt)
4867 return false;
4869 /* Is STMT a vectorizable binary/unary operation? */
4870 if (!is_gimple_assign (stmt))
4871 return false;
4873 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4874 return false;
4876 code = gimple_assign_rhs_code (stmt);
4878 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4879 || code == RROTATE_EXPR))
4880 return false;
4882 scalar_dest = gimple_assign_lhs (stmt);
4883 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4884 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
4886 if (dump_enabled_p ())
4887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4888 "bit-precision shifts not supported.\n");
4889 return false;
4892 op0 = gimple_assign_rhs1 (stmt);
4893 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4895 if (dump_enabled_p ())
4896 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4897 "use not simple.\n");
4898 return false;
4900 /* If op0 is an external or constant def use a vector type with
4901 the same size as the output vector type. */
4902 if (!vectype)
4903 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4904 if (vec_stmt)
4905 gcc_assert (vectype);
4906 if (!vectype)
4908 if (dump_enabled_p ())
4909 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4910 "no vectype for scalar type\n");
4911 return false;
4914 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4915 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4916 if (maybe_ne (nunits_out, nunits_in))
4917 return false;
4919 op1 = gimple_assign_rhs2 (stmt);
4920 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4922 if (dump_enabled_p ())
4923 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4924 "use not simple.\n");
4925 return false;
4928 /* Multiple types in SLP are handled by creating the appropriate number of
4929 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4930 case of SLP. */
4931 if (slp_node)
4932 ncopies = 1;
4933 else
4934 ncopies = vect_get_num_copies (loop_vinfo, vectype);
4936 gcc_assert (ncopies >= 1);
4938 /* Determine whether the shift amount is a vector, or scalar. If the
4939 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4941 if ((dt[1] == vect_internal_def
4942 || dt[1] == vect_induction_def)
4943 && !slp_node)
4944 scalar_shift_arg = false;
4945 else if (dt[1] == vect_constant_def
4946 || dt[1] == vect_external_def
4947 || dt[1] == vect_internal_def)
4949 /* In SLP, need to check whether the shift count is the same,
4950 in loops if it is a constant or invariant, it is always
4951 a scalar shift. */
4952 if (slp_node)
4954 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4955 gimple *slpstmt;
4957 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4958 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4959 scalar_shift_arg = false;
4962 /* If the shift amount is computed by a pattern stmt we cannot
4963 use the scalar amount directly thus give up and use a vector
4964 shift. */
4965 if (dt[1] == vect_internal_def)
4967 gimple *def = SSA_NAME_DEF_STMT (op1);
4968 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
4969 scalar_shift_arg = false;
4972 else
4974 if (dump_enabled_p ())
4975 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4976 "operand mode requires invariant argument.\n");
4977 return false;
4980 /* Vector shifted by vector. */
4981 if (!scalar_shift_arg)
4983 optab = optab_for_tree_code (code, vectype, optab_vector);
4984 if (dump_enabled_p ())
4985 dump_printf_loc (MSG_NOTE, vect_location,
4986 "vector/vector shift/rotate found.\n");
4988 if (!op1_vectype)
4989 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4990 if (op1_vectype == NULL_TREE
4991 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4993 if (dump_enabled_p ())
4994 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4995 "unusable type for last operand in"
4996 " vector/vector shift/rotate.\n");
4997 return false;
5000 /* See if the machine has a vector shifted by scalar insn and if not
5001 then see if it has a vector shifted by vector insn. */
5002 else
5004 optab = optab_for_tree_code (code, vectype, optab_scalar);
5005 if (optab
5006 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5008 if (dump_enabled_p ())
5009 dump_printf_loc (MSG_NOTE, vect_location,
5010 "vector/scalar shift/rotate found.\n");
5012 else
5014 optab = optab_for_tree_code (code, vectype, optab_vector);
5015 if (optab
5016 && (optab_handler (optab, TYPE_MODE (vectype))
5017 != CODE_FOR_nothing))
5019 scalar_shift_arg = false;
5021 if (dump_enabled_p ())
5022 dump_printf_loc (MSG_NOTE, vect_location,
5023 "vector/vector shift/rotate found.\n");
5025 /* Unlike the other binary operators, shifts/rotates have
5026 the rhs being int, instead of the same type as the lhs,
5027 so make sure the scalar is the right type if we are
5028 dealing with vectors of long long/long/short/char. */
5029 if (dt[1] == vect_constant_def)
5030 op1 = fold_convert (TREE_TYPE (vectype), op1);
5031 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5032 TREE_TYPE (op1)))
5034 if (slp_node
5035 && TYPE_MODE (TREE_TYPE (vectype))
5036 != TYPE_MODE (TREE_TYPE (op1)))
5038 if (dump_enabled_p ())
5039 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5040 "unusable type for last operand in"
5041 " vector/vector shift/rotate.\n");
5042 return false;
5044 if (vec_stmt && !slp_node)
5046 op1 = fold_convert (TREE_TYPE (vectype), op1);
5047 op1 = vect_init_vector (stmt, op1,
5048 TREE_TYPE (vectype), NULL);
5055 /* Supportable by target? */
5056 if (!optab)
5058 if (dump_enabled_p ())
5059 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5060 "no optab.\n");
5061 return false;
5063 vec_mode = TYPE_MODE (vectype);
5064 icode = (int) optab_handler (optab, vec_mode);
5065 if (icode == CODE_FOR_nothing)
5067 if (dump_enabled_p ())
5068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5069 "op not supported by target.\n");
5070 /* Check only during analysis. */
5071 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5072 || (!vec_stmt
5073 && !vect_worthwhile_without_simd_p (vinfo, code)))
5074 return false;
5075 if (dump_enabled_p ())
5076 dump_printf_loc (MSG_NOTE, vect_location,
5077 "proceeding using word mode.\n");
5080 /* Worthwhile without SIMD support? Check only during analysis. */
5081 if (!vec_stmt
5082 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5083 && !vect_worthwhile_without_simd_p (vinfo, code))
5085 if (dump_enabled_p ())
5086 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5087 "not worthwhile without SIMD support.\n");
5088 return false;
5091 if (!vec_stmt) /* transformation not required. */
5093 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5094 if (dump_enabled_p ())
5095 dump_printf_loc (MSG_NOTE, vect_location,
5096 "=== vectorizable_shift ===\n");
5097 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5098 return true;
5101 /* Transform. */
5103 if (dump_enabled_p ())
5104 dump_printf_loc (MSG_NOTE, vect_location,
5105 "transform binary/unary operation.\n");
5107 /* Handle def. */
5108 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5110 prev_stmt_info = NULL;
5111 for (j = 0; j < ncopies; j++)
5113 /* Handle uses. */
5114 if (j == 0)
5116 if (scalar_shift_arg)
5118 /* Vector shl and shr insn patterns can be defined with scalar
5119 operand 2 (shift operand). In this case, use constant or loop
5120 invariant op1 directly, without extending it to vector mode
5121 first. */
5122 optab_op2_mode = insn_data[icode].operand[2].mode;
5123 if (!VECTOR_MODE_P (optab_op2_mode))
5125 if (dump_enabled_p ())
5126 dump_printf_loc (MSG_NOTE, vect_location,
5127 "operand 1 using scalar mode.\n");
5128 vec_oprnd1 = op1;
5129 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5130 vec_oprnds1.quick_push (vec_oprnd1);
5131 if (slp_node)
5133 /* Store vec_oprnd1 for every vector stmt to be created
5134 for SLP_NODE. We check during the analysis that all
5135 the shift arguments are the same.
5136 TODO: Allow different constants for different vector
5137 stmts generated for an SLP instance. */
5138 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5139 vec_oprnds1.quick_push (vec_oprnd1);
5144 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5145 (a special case for certain kind of vector shifts); otherwise,
5146 operand 1 should be of a vector type (the usual case). */
5147 if (vec_oprnd1)
5148 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5149 slp_node);
5150 else
5151 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5152 slp_node);
5154 else
5155 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5157 /* Arguments are ready. Create the new vector stmt. */
5158 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5160 vop1 = vec_oprnds1[i];
5161 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5162 new_temp = make_ssa_name (vec_dest, new_stmt);
5163 gimple_assign_set_lhs (new_stmt, new_temp);
5164 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5165 if (slp_node)
5166 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5169 if (slp_node)
5170 continue;
5172 if (j == 0)
5173 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5174 else
5175 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5176 prev_stmt_info = vinfo_for_stmt (new_stmt);
5179 vec_oprnds0.release ();
5180 vec_oprnds1.release ();
5182 return true;
5186 /* Function vectorizable_operation.
5188 Check if STMT performs a binary, unary or ternary operation that can
5189 be vectorized.
5190 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5191 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5192 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5194 static bool
5195 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5196 gimple **vec_stmt, slp_tree slp_node)
5198 tree vec_dest;
5199 tree scalar_dest;
5200 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5201 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5202 tree vectype;
5203 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5204 enum tree_code code, orig_code;
5205 machine_mode vec_mode;
5206 tree new_temp;
5207 int op_type;
5208 optab optab;
5209 bool target_support_p;
5210 gimple *def_stmt;
5211 enum vect_def_type dt[3]
5212 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5213 int ndts = 3;
5214 gimple *new_stmt = NULL;
5215 stmt_vec_info prev_stmt_info;
5216 poly_uint64 nunits_in;
5217 poly_uint64 nunits_out;
5218 tree vectype_out;
5219 int ncopies;
5220 int j, i;
5221 vec<tree> vec_oprnds0 = vNULL;
5222 vec<tree> vec_oprnds1 = vNULL;
5223 vec<tree> vec_oprnds2 = vNULL;
5224 tree vop0, vop1, vop2;
5225 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5226 vec_info *vinfo = stmt_info->vinfo;
5228 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5229 return false;
5231 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5232 && ! vec_stmt)
5233 return false;
5235 /* Is STMT a vectorizable binary/unary operation? */
5236 if (!is_gimple_assign (stmt))
5237 return false;
5239 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5240 return false;
5242 orig_code = code = gimple_assign_rhs_code (stmt);
5244 /* For pointer addition and subtraction, we should use the normal
5245 plus and minus for the vector operation. */
5246 if (code == POINTER_PLUS_EXPR)
5247 code = PLUS_EXPR;
5248 if (code == POINTER_DIFF_EXPR)
5249 code = MINUS_EXPR;
5251 /* Support only unary or binary operations. */
5252 op_type = TREE_CODE_LENGTH (code);
5253 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5255 if (dump_enabled_p ())
5256 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5257 "num. args = %d (not unary/binary/ternary op).\n",
5258 op_type);
5259 return false;
5262 scalar_dest = gimple_assign_lhs (stmt);
5263 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5265 /* Most operations cannot handle bit-precision types without extra
5266 truncations. */
5267 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5268 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5269 /* Exception are bitwise binary operations. */
5270 && code != BIT_IOR_EXPR
5271 && code != BIT_XOR_EXPR
5272 && code != BIT_AND_EXPR)
5274 if (dump_enabled_p ())
5275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5276 "bit-precision arithmetic not supported.\n");
5277 return false;
5280 op0 = gimple_assign_rhs1 (stmt);
5281 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5283 if (dump_enabled_p ())
5284 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5285 "use not simple.\n");
5286 return false;
5288 /* If op0 is an external or constant def use a vector type with
5289 the same size as the output vector type. */
5290 if (!vectype)
5292 /* For boolean type we cannot determine vectype by
5293 invariant value (don't know whether it is a vector
5294 of booleans or vector of integers). We use output
5295 vectype because operations on boolean don't change
5296 type. */
5297 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5299 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5301 if (dump_enabled_p ())
5302 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5303 "not supported operation on bool value.\n");
5304 return false;
5306 vectype = vectype_out;
5308 else
5309 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5311 if (vec_stmt)
5312 gcc_assert (vectype);
5313 if (!vectype)
5315 if (dump_enabled_p ())
5317 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5318 "no vectype for scalar type ");
5319 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5320 TREE_TYPE (op0));
5321 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5324 return false;
5327 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5328 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5329 if (maybe_ne (nunits_out, nunits_in))
5330 return false;
5332 if (op_type == binary_op || op_type == ternary_op)
5334 op1 = gimple_assign_rhs2 (stmt);
5335 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5337 if (dump_enabled_p ())
5338 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5339 "use not simple.\n");
5340 return false;
5343 if (op_type == ternary_op)
5345 op2 = gimple_assign_rhs3 (stmt);
5346 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5348 if (dump_enabled_p ())
5349 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5350 "use not simple.\n");
5351 return false;
5355 /* Multiple types in SLP are handled by creating the appropriate number of
5356 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5357 case of SLP. */
5358 if (slp_node)
5359 ncopies = 1;
5360 else
5361 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5363 gcc_assert (ncopies >= 1);
5365 /* Shifts are handled in vectorizable_shift (). */
5366 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5367 || code == RROTATE_EXPR)
5368 return false;
5370 /* Supportable by target? */
5372 vec_mode = TYPE_MODE (vectype);
5373 if (code == MULT_HIGHPART_EXPR)
5374 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5375 else
5377 optab = optab_for_tree_code (code, vectype, optab_default);
5378 if (!optab)
5380 if (dump_enabled_p ())
5381 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5382 "no optab.\n");
5383 return false;
5385 target_support_p = (optab_handler (optab, vec_mode)
5386 != CODE_FOR_nothing);
5389 if (!target_support_p)
5391 if (dump_enabled_p ())
5392 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5393 "op not supported by target.\n");
5394 /* Check only during analysis. */
5395 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5396 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5397 return false;
5398 if (dump_enabled_p ())
5399 dump_printf_loc (MSG_NOTE, vect_location,
5400 "proceeding using word mode.\n");
5403 /* Worthwhile without SIMD support? Check only during analysis. */
5404 if (!VECTOR_MODE_P (vec_mode)
5405 && !vec_stmt
5406 && !vect_worthwhile_without_simd_p (vinfo, code))
5408 if (dump_enabled_p ())
5409 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5410 "not worthwhile without SIMD support.\n");
5411 return false;
5414 if (!vec_stmt) /* transformation not required. */
5416 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5417 if (dump_enabled_p ())
5418 dump_printf_loc (MSG_NOTE, vect_location,
5419 "=== vectorizable_operation ===\n");
5420 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5421 return true;
5424 /* Transform. */
5426 if (dump_enabled_p ())
5427 dump_printf_loc (MSG_NOTE, vect_location,
5428 "transform binary/unary operation.\n");
5430 /* Handle def. */
5431 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5433 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5434 vectors with unsigned elements, but the result is signed. So, we
5435 need to compute the MINUS_EXPR into vectype temporary and
5436 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5437 tree vec_cvt_dest = NULL_TREE;
5438 if (orig_code == POINTER_DIFF_EXPR)
5439 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5441 /* In case the vectorization factor (VF) is bigger than the number
5442 of elements that we can fit in a vectype (nunits), we have to generate
5443 more than one vector stmt - i.e - we need to "unroll" the
5444 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5445 from one copy of the vector stmt to the next, in the field
5446 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5447 stages to find the correct vector defs to be used when vectorizing
5448 stmts that use the defs of the current stmt. The example below
5449 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5450 we need to create 4 vectorized stmts):
5452 before vectorization:
5453 RELATED_STMT VEC_STMT
5454 S1: x = memref - -
5455 S2: z = x + 1 - -
5457 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5458 there):
5459 RELATED_STMT VEC_STMT
5460 VS1_0: vx0 = memref0 VS1_1 -
5461 VS1_1: vx1 = memref1 VS1_2 -
5462 VS1_2: vx2 = memref2 VS1_3 -
5463 VS1_3: vx3 = memref3 - -
5464 S1: x = load - VS1_0
5465 S2: z = x + 1 - -
5467 step2: vectorize stmt S2 (done here):
5468 To vectorize stmt S2 we first need to find the relevant vector
5469 def for the first operand 'x'. This is, as usual, obtained from
5470 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5471 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5472 relevant vector def 'vx0'. Having found 'vx0' we can generate
5473 the vector stmt VS2_0, and as usual, record it in the
5474 STMT_VINFO_VEC_STMT of stmt S2.
5475 When creating the second copy (VS2_1), we obtain the relevant vector
5476 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5477 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5478 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5479 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5480 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5481 chain of stmts and pointers:
5482 RELATED_STMT VEC_STMT
5483 VS1_0: vx0 = memref0 VS1_1 -
5484 VS1_1: vx1 = memref1 VS1_2 -
5485 VS1_2: vx2 = memref2 VS1_3 -
5486 VS1_3: vx3 = memref3 - -
5487 S1: x = load - VS1_0
5488 VS2_0: vz0 = vx0 + v1 VS2_1 -
5489 VS2_1: vz1 = vx1 + v1 VS2_2 -
5490 VS2_2: vz2 = vx2 + v1 VS2_3 -
5491 VS2_3: vz3 = vx3 + v1 - -
5492 S2: z = x + 1 - VS2_0 */
5494 prev_stmt_info = NULL;
5495 for (j = 0; j < ncopies; j++)
5497 /* Handle uses. */
5498 if (j == 0)
5500 if (op_type == binary_op || op_type == ternary_op)
5501 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5502 slp_node);
5503 else
5504 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5505 slp_node);
5506 if (op_type == ternary_op)
5507 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5508 slp_node);
5510 else
5512 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5513 if (op_type == ternary_op)
5515 tree vec_oprnd = vec_oprnds2.pop ();
5516 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5517 vec_oprnd));
5521 /* Arguments are ready. Create the new vector stmt. */
5522 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5524 vop1 = ((op_type == binary_op || op_type == ternary_op)
5525 ? vec_oprnds1[i] : NULL_TREE);
5526 vop2 = ((op_type == ternary_op)
5527 ? vec_oprnds2[i] : NULL_TREE);
5528 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5529 new_temp = make_ssa_name (vec_dest, new_stmt);
5530 gimple_assign_set_lhs (new_stmt, new_temp);
5531 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5532 if (vec_cvt_dest)
5534 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
5535 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
5536 new_temp);
5537 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
5538 gimple_assign_set_lhs (new_stmt, new_temp);
5539 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5541 if (slp_node)
5542 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5545 if (slp_node)
5546 continue;
5548 if (j == 0)
5549 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5550 else
5551 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5552 prev_stmt_info = vinfo_for_stmt (new_stmt);
5555 vec_oprnds0.release ();
5556 vec_oprnds1.release ();
5557 vec_oprnds2.release ();
5559 return true;
5562 /* A helper function to ensure data reference DR's base alignment. */
5564 static void
5565 ensure_base_align (struct data_reference *dr)
5567 if (!dr->aux)
5568 return;
5570 if (DR_VECT_AUX (dr)->base_misaligned)
5572 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5574 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
5576 if (decl_in_symtab_p (base_decl))
5577 symtab_node::get (base_decl)->increase_alignment (align_base_to);
5578 else
5580 SET_DECL_ALIGN (base_decl, align_base_to);
5581 DECL_USER_ALIGN (base_decl) = 1;
5583 DR_VECT_AUX (dr)->base_misaligned = false;
5588 /* Function get_group_alias_ptr_type.
5590 Return the alias type for the group starting at FIRST_STMT. */
5592 static tree
5593 get_group_alias_ptr_type (gimple *first_stmt)
5595 struct data_reference *first_dr, *next_dr;
5596 gimple *next_stmt;
5598 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5599 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5600 while (next_stmt)
5602 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5603 if (get_alias_set (DR_REF (first_dr))
5604 != get_alias_set (DR_REF (next_dr)))
5606 if (dump_enabled_p ())
5607 dump_printf_loc (MSG_NOTE, vect_location,
5608 "conflicting alias set types.\n");
5609 return ptr_type_node;
5611 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5613 return reference_alias_ptr_type (DR_REF (first_dr));
5617 /* Function vectorizable_store.
5619 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5620 can be vectorized.
5621 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5622 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5623 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5625 static bool
5626 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5627 slp_tree slp_node)
5629 tree data_ref;
5630 tree op;
5631 tree vec_oprnd = NULL_TREE;
5632 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5633 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5634 tree elem_type;
5635 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5636 struct loop *loop = NULL;
5637 machine_mode vec_mode;
5638 tree dummy;
5639 enum dr_alignment_support alignment_support_scheme;
5640 gimple *def_stmt;
5641 enum vect_def_type dt;
5642 stmt_vec_info prev_stmt_info = NULL;
5643 tree dataref_ptr = NULL_TREE;
5644 tree dataref_offset = NULL_TREE;
5645 gimple *ptr_incr = NULL;
5646 int ncopies;
5647 int j;
5648 gimple *next_stmt, *first_stmt;
5649 bool grouped_store;
5650 unsigned int group_size, i;
5651 vec<tree> oprnds = vNULL;
5652 vec<tree> result_chain = vNULL;
5653 bool inv_p;
5654 tree offset = NULL_TREE;
5655 vec<tree> vec_oprnds = vNULL;
5656 bool slp = (slp_node != NULL);
5657 unsigned int vec_num;
5658 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5659 vec_info *vinfo = stmt_info->vinfo;
5660 tree aggr_type;
5661 gather_scatter_info gs_info;
5662 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5663 gimple *new_stmt;
5664 poly_uint64 vf;
5665 vec_load_store_type vls_type;
5666 tree ref_type;
5668 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5669 return false;
5671 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5672 && ! vec_stmt)
5673 return false;
5675 /* Is vectorizable store? */
5677 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
5678 if (is_gimple_assign (stmt))
5680 tree scalar_dest = gimple_assign_lhs (stmt);
5681 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5682 && is_pattern_stmt_p (stmt_info))
5683 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5684 if (TREE_CODE (scalar_dest) != ARRAY_REF
5685 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5686 && TREE_CODE (scalar_dest) != INDIRECT_REF
5687 && TREE_CODE (scalar_dest) != COMPONENT_REF
5688 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5689 && TREE_CODE (scalar_dest) != REALPART_EXPR
5690 && TREE_CODE (scalar_dest) != MEM_REF)
5691 return false;
5693 else
5695 gcall *call = dyn_cast <gcall *> (stmt);
5696 if (!call || !gimple_call_internal_p (call, IFN_MASK_STORE))
5697 return false;
5699 if (slp_node != NULL)
5701 if (dump_enabled_p ())
5702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5703 "SLP of masked stores not supported.\n");
5704 return false;
5707 ref_type = TREE_TYPE (gimple_call_arg (call, 1));
5708 mask = gimple_call_arg (call, 2);
5709 if (!vect_check_load_store_mask (stmt, mask, &mask_vectype))
5710 return false;
5713 op = vect_get_store_rhs (stmt);
5715 /* Cannot have hybrid store SLP -- that would mean storing to the
5716 same location twice. */
5717 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5719 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5720 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5722 if (loop_vinfo)
5724 loop = LOOP_VINFO_LOOP (loop_vinfo);
5725 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5727 else
5728 vf = 1;
5730 /* Multiple types in SLP are handled by creating the appropriate number of
5731 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5732 case of SLP. */
5733 if (slp)
5734 ncopies = 1;
5735 else
5736 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5738 gcc_assert (ncopies >= 1);
5740 /* FORNOW. This restriction should be relaxed. */
5741 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5743 if (dump_enabled_p ())
5744 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5745 "multiple types in nested loop.\n");
5746 return false;
5749 if (!vect_check_store_rhs (stmt, op, &rhs_vectype, &vls_type))
5750 return false;
5752 elem_type = TREE_TYPE (vectype);
5753 vec_mode = TYPE_MODE (vectype);
5755 if (!STMT_VINFO_DATA_REF (stmt_info))
5756 return false;
5758 vect_memory_access_type memory_access_type;
5759 if (!get_load_store_type (stmt, vectype, slp, vls_type, ncopies,
5760 &memory_access_type, &gs_info))
5761 return false;
5763 if (mask)
5765 if (memory_access_type != VMAT_CONTIGUOUS)
5767 if (dump_enabled_p ())
5768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5769 "unsupported access type for masked store.\n");
5770 return false;
5772 if (!VECTOR_MODE_P (vec_mode)
5773 || !can_vec_mask_load_store_p (vec_mode, TYPE_MODE (mask_vectype),
5774 false))
5775 return false;
5777 else
5779 /* FORNOW. In some cases can vectorize even if data-type not supported
5780 (e.g. - array initialization with 0). */
5781 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5782 return false;
5785 if (!vec_stmt) /* transformation not required. */
5787 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
5788 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5789 /* The SLP costs are calculated during SLP analysis. */
5790 if (!PURE_SLP_STMT (stmt_info))
5791 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
5792 vls_type, NULL, NULL, NULL);
5793 return true;
5795 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
5797 /* Transform. */
5799 ensure_base_align (dr);
5801 if (memory_access_type == VMAT_GATHER_SCATTER)
5803 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
5804 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
5805 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5806 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5807 edge pe = loop_preheader_edge (loop);
5808 gimple_seq seq;
5809 basic_block new_bb;
5810 enum { NARROW, NONE, WIDEN } modifier;
5811 poly_uint64 scatter_off_nunits
5812 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
5814 if (known_eq (nunits, scatter_off_nunits))
5815 modifier = NONE;
5816 else if (known_eq (nunits * 2, scatter_off_nunits))
5818 modifier = WIDEN;
5820 /* Currently gathers and scatters are only supported for
5821 fixed-length vectors. */
5822 unsigned int count = scatter_off_nunits.to_constant ();
5823 vec_perm_builder sel (count, count, 1);
5824 for (i = 0; i < (unsigned int) count; ++i)
5825 sel.quick_push (i | (count / 2));
5827 vec_perm_indices indices (sel, 1, count);
5828 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
5829 indices);
5830 gcc_assert (perm_mask != NULL_TREE);
5832 else if (known_eq (nunits, scatter_off_nunits * 2))
5834 modifier = NARROW;
5836 /* Currently gathers and scatters are only supported for
5837 fixed-length vectors. */
5838 unsigned int count = nunits.to_constant ();
5839 vec_perm_builder sel (count, count, 1);
5840 for (i = 0; i < (unsigned int) count; ++i)
5841 sel.quick_push (i | (count / 2));
5843 vec_perm_indices indices (sel, 2, count);
5844 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
5845 gcc_assert (perm_mask != NULL_TREE);
5846 ncopies *= 2;
5848 else
5849 gcc_unreachable ();
5851 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
5852 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5853 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5854 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5855 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5856 scaletype = TREE_VALUE (arglist);
5858 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5859 && TREE_CODE (rettype) == VOID_TYPE);
5861 ptr = fold_convert (ptrtype, gs_info.base);
5862 if (!is_gimple_min_invariant (ptr))
5864 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5865 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5866 gcc_assert (!new_bb);
5869 /* Currently we support only unconditional scatter stores,
5870 so mask should be all ones. */
5871 mask = build_int_cst (masktype, -1);
5872 mask = vect_init_vector (stmt, mask, masktype, NULL);
5874 scale = build_int_cst (scaletype, gs_info.scale);
5876 prev_stmt_info = NULL;
5877 for (j = 0; j < ncopies; ++j)
5879 if (j == 0)
5881 src = vec_oprnd1
5882 = vect_get_vec_def_for_operand (op, stmt);
5883 op = vec_oprnd0
5884 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
5886 else if (modifier != NONE && (j & 1))
5888 if (modifier == WIDEN)
5890 src = vec_oprnd1
5891 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5892 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5893 stmt, gsi);
5895 else if (modifier == NARROW)
5897 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5898 stmt, gsi);
5899 op = vec_oprnd0
5900 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5901 vec_oprnd0);
5903 else
5904 gcc_unreachable ();
5906 else
5908 src = vec_oprnd1
5909 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5910 op = vec_oprnd0
5911 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5912 vec_oprnd0);
5915 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5917 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
5918 TYPE_VECTOR_SUBPARTS (srctype)));
5919 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5920 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5921 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5922 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5923 src = var;
5926 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5928 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
5929 TYPE_VECTOR_SUBPARTS (idxtype)));
5930 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5931 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5932 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5933 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5934 op = var;
5937 new_stmt
5938 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
5940 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5942 if (prev_stmt_info == NULL)
5943 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5944 else
5945 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5946 prev_stmt_info = vinfo_for_stmt (new_stmt);
5948 return true;
5951 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
5952 if (grouped_store)
5954 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5955 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5956 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5958 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5960 /* FORNOW */
5961 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5963 /* We vectorize all the stmts of the interleaving group when we
5964 reach the last stmt in the group. */
5965 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5966 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5967 && !slp)
5969 *vec_stmt = NULL;
5970 return true;
5973 if (slp)
5975 grouped_store = false;
5976 /* VEC_NUM is the number of vect stmts to be created for this
5977 group. */
5978 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5979 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5980 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
5981 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5982 op = vect_get_store_rhs (first_stmt);
5984 else
5985 /* VEC_NUM is the number of vect stmts to be created for this
5986 group. */
5987 vec_num = group_size;
5989 ref_type = get_group_alias_ptr_type (first_stmt);
5991 else
5993 first_stmt = stmt;
5994 first_dr = dr;
5995 group_size = vec_num = 1;
5996 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
5999 if (dump_enabled_p ())
6000 dump_printf_loc (MSG_NOTE, vect_location,
6001 "transform store. ncopies = %d\n", ncopies);
6003 if (memory_access_type == VMAT_ELEMENTWISE
6004 || memory_access_type == VMAT_STRIDED_SLP)
6006 gimple_stmt_iterator incr_gsi;
6007 bool insert_after;
6008 gimple *incr;
6009 tree offvar;
6010 tree ivstep;
6011 tree running_off;
6012 gimple_seq stmts = NULL;
6013 tree stride_base, stride_step, alias_off;
6014 tree vec_oprnd;
6015 unsigned int g;
6016 /* Checked by get_load_store_type. */
6017 unsigned int const_nunits = nunits.to_constant ();
6019 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6021 stride_base
6022 = fold_build_pointer_plus
6023 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
6024 size_binop (PLUS_EXPR,
6025 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
6026 convert_to_ptrofftype (DR_INIT (first_dr))));
6027 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
6029 /* For a store with loop-invariant (but other than power-of-2)
6030 stride (i.e. not a grouped access) like so:
6032 for (i = 0; i < n; i += stride)
6033 array[i] = ...;
6035 we generate a new induction variable and new stores from
6036 the components of the (vectorized) rhs:
6038 for (j = 0; ; j += VF*stride)
6039 vectemp = ...;
6040 tmp1 = vectemp[0];
6041 array[j] = tmp1;
6042 tmp2 = vectemp[1];
6043 array[j + stride] = tmp2;
6047 unsigned nstores = const_nunits;
6048 unsigned lnel = 1;
6049 tree ltype = elem_type;
6050 tree lvectype = vectype;
6051 if (slp)
6053 if (group_size < const_nunits
6054 && const_nunits % group_size == 0)
6056 nstores = const_nunits / group_size;
6057 lnel = group_size;
6058 ltype = build_vector_type (elem_type, group_size);
6059 lvectype = vectype;
6061 /* First check if vec_extract optab doesn't support extraction
6062 of vector elts directly. */
6063 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6064 machine_mode vmode;
6065 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6066 || !VECTOR_MODE_P (vmode)
6067 || (convert_optab_handler (vec_extract_optab,
6068 TYPE_MODE (vectype), vmode)
6069 == CODE_FOR_nothing))
6071 /* Try to avoid emitting an extract of vector elements
6072 by performing the extracts using an integer type of the
6073 same size, extracting from a vector of those and then
6074 re-interpreting it as the original vector type if
6075 supported. */
6076 unsigned lsize
6077 = group_size * GET_MODE_BITSIZE (elmode);
6078 elmode = int_mode_for_size (lsize, 0).require ();
6079 unsigned int lnunits = const_nunits / group_size;
6080 /* If we can't construct such a vector fall back to
6081 element extracts from the original vector type and
6082 element size stores. */
6083 if (mode_for_vector (elmode, lnunits).exists (&vmode)
6084 && VECTOR_MODE_P (vmode)
6085 && (convert_optab_handler (vec_extract_optab,
6086 vmode, elmode)
6087 != CODE_FOR_nothing))
6089 nstores = lnunits;
6090 lnel = group_size;
6091 ltype = build_nonstandard_integer_type (lsize, 1);
6092 lvectype = build_vector_type (ltype, nstores);
6094 /* Else fall back to vector extraction anyway.
6095 Fewer stores are more important than avoiding spilling
6096 of the vector we extract from. Compared to the
6097 construction case in vectorizable_load no store-forwarding
6098 issue exists here for reasonable archs. */
6101 else if (group_size >= const_nunits
6102 && group_size % const_nunits == 0)
6104 nstores = 1;
6105 lnel = const_nunits;
6106 ltype = vectype;
6107 lvectype = vectype;
6109 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6110 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6113 ivstep = stride_step;
6114 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6115 build_int_cst (TREE_TYPE (ivstep), vf));
6117 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6119 create_iv (stride_base, ivstep, NULL,
6120 loop, &incr_gsi, insert_after,
6121 &offvar, NULL);
6122 incr = gsi_stmt (incr_gsi);
6123 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6125 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6126 if (stmts)
6127 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6129 prev_stmt_info = NULL;
6130 alias_off = build_int_cst (ref_type, 0);
6131 next_stmt = first_stmt;
6132 for (g = 0; g < group_size; g++)
6134 running_off = offvar;
6135 if (g)
6137 tree size = TYPE_SIZE_UNIT (ltype);
6138 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6139 size);
6140 tree newoff = copy_ssa_name (running_off, NULL);
6141 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6142 running_off, pos);
6143 vect_finish_stmt_generation (stmt, incr, gsi);
6144 running_off = newoff;
6146 unsigned int group_el = 0;
6147 unsigned HOST_WIDE_INT
6148 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6149 for (j = 0; j < ncopies; j++)
6151 /* We've set op and dt above, from vect_get_store_rhs,
6152 and first_stmt == stmt. */
6153 if (j == 0)
6155 if (slp)
6157 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6158 slp_node);
6159 vec_oprnd = vec_oprnds[0];
6161 else
6163 op = vect_get_store_rhs (next_stmt);
6164 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6167 else
6169 if (slp)
6170 vec_oprnd = vec_oprnds[j];
6171 else
6173 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
6174 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
6177 /* Pun the vector to extract from if necessary. */
6178 if (lvectype != vectype)
6180 tree tem = make_ssa_name (lvectype);
6181 gimple *pun
6182 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6183 lvectype, vec_oprnd));
6184 vect_finish_stmt_generation (stmt, pun, gsi);
6185 vec_oprnd = tem;
6187 for (i = 0; i < nstores; i++)
6189 tree newref, newoff;
6190 gimple *incr, *assign;
6191 tree size = TYPE_SIZE (ltype);
6192 /* Extract the i'th component. */
6193 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6194 bitsize_int (i), size);
6195 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6196 size, pos);
6198 elem = force_gimple_operand_gsi (gsi, elem, true,
6199 NULL_TREE, true,
6200 GSI_SAME_STMT);
6202 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6203 group_el * elsz);
6204 newref = build2 (MEM_REF, ltype,
6205 running_off, this_off);
6207 /* And store it to *running_off. */
6208 assign = gimple_build_assign (newref, elem);
6209 vect_finish_stmt_generation (stmt, assign, gsi);
6211 group_el += lnel;
6212 if (! slp
6213 || group_el == group_size)
6215 newoff = copy_ssa_name (running_off, NULL);
6216 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6217 running_off, stride_step);
6218 vect_finish_stmt_generation (stmt, incr, gsi);
6220 running_off = newoff;
6221 group_el = 0;
6223 if (g == group_size - 1
6224 && !slp)
6226 if (j == 0 && i == 0)
6227 STMT_VINFO_VEC_STMT (stmt_info)
6228 = *vec_stmt = assign;
6229 else
6230 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6231 prev_stmt_info = vinfo_for_stmt (assign);
6235 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6236 if (slp)
6237 break;
6240 vec_oprnds.release ();
6241 return true;
6244 auto_vec<tree> dr_chain (group_size);
6245 oprnds.create (group_size);
6247 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6248 gcc_assert (alignment_support_scheme);
6249 /* Targets with store-lane instructions must not require explicit
6250 realignment. vect_supportable_dr_alignment always returns either
6251 dr_aligned or dr_unaligned_supported for masked operations. */
6252 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES && !mask)
6253 || alignment_support_scheme == dr_aligned
6254 || alignment_support_scheme == dr_unaligned_supported);
6256 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6257 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6258 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6260 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6261 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6262 else
6263 aggr_type = vectype;
6265 if (mask)
6266 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6268 /* In case the vectorization factor (VF) is bigger than the number
6269 of elements that we can fit in a vectype (nunits), we have to generate
6270 more than one vector stmt - i.e - we need to "unroll" the
6271 vector stmt by a factor VF/nunits. For more details see documentation in
6272 vect_get_vec_def_for_copy_stmt. */
6274 /* In case of interleaving (non-unit grouped access):
6276 S1: &base + 2 = x2
6277 S2: &base = x0
6278 S3: &base + 1 = x1
6279 S4: &base + 3 = x3
6281 We create vectorized stores starting from base address (the access of the
6282 first stmt in the chain (S2 in the above example), when the last store stmt
6283 of the chain (S4) is reached:
6285 VS1: &base = vx2
6286 VS2: &base + vec_size*1 = vx0
6287 VS3: &base + vec_size*2 = vx1
6288 VS4: &base + vec_size*3 = vx3
6290 Then permutation statements are generated:
6292 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6293 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6296 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6297 (the order of the data-refs in the output of vect_permute_store_chain
6298 corresponds to the order of scalar stmts in the interleaving chain - see
6299 the documentation of vect_permute_store_chain()).
6301 In case of both multiple types and interleaving, above vector stores and
6302 permutation stmts are created for every copy. The result vector stmts are
6303 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6304 STMT_VINFO_RELATED_STMT for the next copies.
6307 prev_stmt_info = NULL;
6308 tree vec_mask = NULL_TREE;
6309 for (j = 0; j < ncopies; j++)
6312 if (j == 0)
6314 if (slp)
6316 /* Get vectorized arguments for SLP_NODE. */
6317 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6318 NULL, slp_node);
6320 vec_oprnd = vec_oprnds[0];
6322 else
6324 /* For interleaved stores we collect vectorized defs for all the
6325 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6326 used as an input to vect_permute_store_chain(), and OPRNDS as
6327 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6329 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6330 OPRNDS are of size 1. */
6331 next_stmt = first_stmt;
6332 for (i = 0; i < group_size; i++)
6334 /* Since gaps are not supported for interleaved stores,
6335 GROUP_SIZE is the exact number of stmts in the chain.
6336 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6337 there is no interleaving, GROUP_SIZE is 1, and only one
6338 iteration of the loop will be executed. */
6339 op = vect_get_store_rhs (next_stmt);
6340 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6341 dr_chain.quick_push (vec_oprnd);
6342 oprnds.quick_push (vec_oprnd);
6343 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6345 if (mask)
6346 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
6347 mask_vectype);
6350 /* We should have catched mismatched types earlier. */
6351 gcc_assert (useless_type_conversion_p (vectype,
6352 TREE_TYPE (vec_oprnd)));
6353 bool simd_lane_access_p
6354 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6355 if (simd_lane_access_p
6356 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6357 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6358 && integer_zerop (DR_OFFSET (first_dr))
6359 && integer_zerop (DR_INIT (first_dr))
6360 && alias_sets_conflict_p (get_alias_set (aggr_type),
6361 get_alias_set (TREE_TYPE (ref_type))))
6363 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6364 dataref_offset = build_int_cst (ref_type, 0);
6365 inv_p = false;
6367 else
6368 dataref_ptr
6369 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6370 simd_lane_access_p ? loop : NULL,
6371 offset, &dummy, gsi, &ptr_incr,
6372 simd_lane_access_p, &inv_p);
6373 gcc_assert (bb_vinfo || !inv_p);
6375 else
6377 /* For interleaved stores we created vectorized defs for all the
6378 defs stored in OPRNDS in the previous iteration (previous copy).
6379 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6380 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6381 next copy.
6382 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6383 OPRNDS are of size 1. */
6384 for (i = 0; i < group_size; i++)
6386 op = oprnds[i];
6387 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
6388 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
6389 dr_chain[i] = vec_oprnd;
6390 oprnds[i] = vec_oprnd;
6392 if (mask)
6394 vect_is_simple_use (vec_mask, vinfo, &def_stmt, &dt);
6395 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
6397 if (dataref_offset)
6398 dataref_offset
6399 = int_const_binop (PLUS_EXPR, dataref_offset,
6400 TYPE_SIZE_UNIT (aggr_type));
6401 else
6402 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6403 TYPE_SIZE_UNIT (aggr_type));
6406 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6408 tree vec_array;
6410 /* Combine all the vectors into an array. */
6411 vec_array = create_vector_array (vectype, vec_num);
6412 for (i = 0; i < vec_num; i++)
6414 vec_oprnd = dr_chain[i];
6415 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6418 /* Emit:
6419 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6420 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6421 gcall *call = gimple_build_call_internal (IFN_STORE_LANES, 1,
6422 vec_array);
6423 gimple_call_set_lhs (call, data_ref);
6424 gimple_call_set_nothrow (call, true);
6425 new_stmt = call;
6426 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6428 else
6430 new_stmt = NULL;
6431 if (grouped_store)
6433 if (j == 0)
6434 result_chain.create (group_size);
6435 /* Permute. */
6436 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6437 &result_chain);
6440 next_stmt = first_stmt;
6441 for (i = 0; i < vec_num; i++)
6443 unsigned align, misalign;
6445 if (i > 0)
6446 /* Bump the vector pointer. */
6447 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6448 stmt, NULL_TREE);
6450 if (slp)
6451 vec_oprnd = vec_oprnds[i];
6452 else if (grouped_store)
6453 /* For grouped stores vectorized defs are interleaved in
6454 vect_permute_store_chain(). */
6455 vec_oprnd = result_chain[i];
6457 align = DR_TARGET_ALIGNMENT (first_dr);
6458 if (aligned_access_p (first_dr))
6459 misalign = 0;
6460 else if (DR_MISALIGNMENT (first_dr) == -1)
6462 align = dr_alignment (vect_dr_behavior (first_dr));
6463 misalign = 0;
6465 else
6466 misalign = DR_MISALIGNMENT (first_dr);
6467 if (dataref_offset == NULL_TREE
6468 && TREE_CODE (dataref_ptr) == SSA_NAME)
6469 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6470 misalign);
6472 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6474 tree perm_mask = perm_mask_for_reverse (vectype);
6475 tree perm_dest
6476 = vect_create_destination_var (vect_get_store_rhs (stmt),
6477 vectype);
6478 tree new_temp = make_ssa_name (perm_dest);
6480 /* Generate the permute statement. */
6481 gimple *perm_stmt
6482 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6483 vec_oprnd, perm_mask);
6484 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6486 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6487 vec_oprnd = new_temp;
6490 /* Arguments are ready. Create the new vector stmt. */
6491 if (mask)
6493 align = least_bit_hwi (misalign | align);
6494 tree ptr = build_int_cst (ref_type, align);
6495 gcall *call
6496 = gimple_build_call_internal (IFN_MASK_STORE, 4,
6497 dataref_ptr, ptr,
6498 vec_mask, vec_oprnd);
6499 gimple_call_set_nothrow (call, true);
6500 new_stmt = call;
6502 else
6504 data_ref = fold_build2 (MEM_REF, vectype,
6505 dataref_ptr,
6506 dataref_offset
6507 ? dataref_offset
6508 : build_int_cst (ref_type, 0));
6509 if (aligned_access_p (first_dr))
6511 else if (DR_MISALIGNMENT (first_dr) == -1)
6512 TREE_TYPE (data_ref)
6513 = build_aligned_type (TREE_TYPE (data_ref),
6514 align * BITS_PER_UNIT);
6515 else
6516 TREE_TYPE (data_ref)
6517 = build_aligned_type (TREE_TYPE (data_ref),
6518 TYPE_ALIGN (elem_type));
6519 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6521 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6523 if (slp)
6524 continue;
6526 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6527 if (!next_stmt)
6528 break;
6531 if (!slp)
6533 if (j == 0)
6534 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6535 else
6536 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6537 prev_stmt_info = vinfo_for_stmt (new_stmt);
6541 oprnds.release ();
6542 result_chain.release ();
6543 vec_oprnds.release ();
6545 return true;
6548 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6549 VECTOR_CST mask. No checks are made that the target platform supports the
6550 mask, so callers may wish to test can_vec_perm_const_p separately, or use
6551 vect_gen_perm_mask_checked. */
6553 tree
6554 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
6556 tree mask_type;
6558 poly_uint64 nunits = sel.length ();
6559 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
6561 mask_type = build_vector_type (ssizetype, nunits);
6562 return vec_perm_indices_to_tree (mask_type, sel);
6565 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
6566 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6568 tree
6569 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
6571 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
6572 return vect_gen_perm_mask_any (vectype, sel);
6575 /* Given a vector variable X and Y, that was generated for the scalar
6576 STMT, generate instructions to permute the vector elements of X and Y
6577 using permutation mask MASK_VEC, insert them at *GSI and return the
6578 permuted vector variable. */
6580 static tree
6581 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6582 gimple_stmt_iterator *gsi)
6584 tree vectype = TREE_TYPE (x);
6585 tree perm_dest, data_ref;
6586 gimple *perm_stmt;
6588 tree scalar_dest = gimple_get_lhs (stmt);
6589 if (TREE_CODE (scalar_dest) == SSA_NAME)
6590 perm_dest = vect_create_destination_var (scalar_dest, vectype);
6591 else
6592 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
6593 data_ref = make_ssa_name (perm_dest);
6595 /* Generate the permute statement. */
6596 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6597 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6599 return data_ref;
6602 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6603 inserting them on the loops preheader edge. Returns true if we
6604 were successful in doing so (and thus STMT can be moved then),
6605 otherwise returns false. */
6607 static bool
6608 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6610 ssa_op_iter i;
6611 tree op;
6612 bool any = false;
6614 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6616 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6617 if (!gimple_nop_p (def_stmt)
6618 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6620 /* Make sure we don't need to recurse. While we could do
6621 so in simple cases when there are more complex use webs
6622 we don't have an easy way to preserve stmt order to fulfil
6623 dependencies within them. */
6624 tree op2;
6625 ssa_op_iter i2;
6626 if (gimple_code (def_stmt) == GIMPLE_PHI)
6627 return false;
6628 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6630 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6631 if (!gimple_nop_p (def_stmt2)
6632 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6633 return false;
6635 any = true;
6639 if (!any)
6640 return true;
6642 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6644 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6645 if (!gimple_nop_p (def_stmt)
6646 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6648 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6649 gsi_remove (&gsi, false);
6650 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6654 return true;
6657 /* vectorizable_load.
6659 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6660 can be vectorized.
6661 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6662 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6663 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6665 static bool
6666 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6667 slp_tree slp_node, slp_instance slp_node_instance)
6669 tree scalar_dest;
6670 tree vec_dest = NULL;
6671 tree data_ref = NULL;
6672 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6673 stmt_vec_info prev_stmt_info;
6674 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6675 struct loop *loop = NULL;
6676 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6677 bool nested_in_vect_loop = false;
6678 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6679 tree elem_type;
6680 tree new_temp;
6681 machine_mode mode;
6682 gimple *new_stmt = NULL;
6683 tree dummy;
6684 enum dr_alignment_support alignment_support_scheme;
6685 tree dataref_ptr = NULL_TREE;
6686 tree dataref_offset = NULL_TREE;
6687 gimple *ptr_incr = NULL;
6688 int ncopies;
6689 int i, j;
6690 unsigned int group_size;
6691 poly_uint64 group_gap_adj;
6692 tree msq = NULL_TREE, lsq;
6693 tree offset = NULL_TREE;
6694 tree byte_offset = NULL_TREE;
6695 tree realignment_token = NULL_TREE;
6696 gphi *phi = NULL;
6697 vec<tree> dr_chain = vNULL;
6698 bool grouped_load = false;
6699 gimple *first_stmt;
6700 gimple *first_stmt_for_drptr = NULL;
6701 bool inv_p;
6702 bool compute_in_loop = false;
6703 struct loop *at_loop;
6704 int vec_num;
6705 bool slp = (slp_node != NULL);
6706 bool slp_perm = false;
6707 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6708 poly_uint64 vf;
6709 tree aggr_type;
6710 gather_scatter_info gs_info;
6711 vec_info *vinfo = stmt_info->vinfo;
6712 tree ref_type;
6714 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6715 return false;
6717 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6718 && ! vec_stmt)
6719 return false;
6721 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6722 if (is_gimple_assign (stmt))
6724 scalar_dest = gimple_assign_lhs (stmt);
6725 if (TREE_CODE (scalar_dest) != SSA_NAME)
6726 return false;
6728 tree_code code = gimple_assign_rhs_code (stmt);
6729 if (code != ARRAY_REF
6730 && code != BIT_FIELD_REF
6731 && code != INDIRECT_REF
6732 && code != COMPONENT_REF
6733 && code != IMAGPART_EXPR
6734 && code != REALPART_EXPR
6735 && code != MEM_REF
6736 && TREE_CODE_CLASS (code) != tcc_declaration)
6737 return false;
6739 else
6741 gcall *call = dyn_cast <gcall *> (stmt);
6742 if (!call || !gimple_call_internal_p (call, IFN_MASK_LOAD))
6743 return false;
6745 scalar_dest = gimple_call_lhs (call);
6746 if (!scalar_dest)
6747 return false;
6749 if (slp_node != NULL)
6751 if (dump_enabled_p ())
6752 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6753 "SLP of masked loads not supported.\n");
6754 return false;
6757 mask = gimple_call_arg (call, 2);
6758 if (!vect_check_load_store_mask (stmt, mask, &mask_vectype))
6759 return false;
6762 if (!STMT_VINFO_DATA_REF (stmt_info))
6763 return false;
6765 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6766 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6768 if (loop_vinfo)
6770 loop = LOOP_VINFO_LOOP (loop_vinfo);
6771 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6772 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6774 else
6775 vf = 1;
6777 /* Multiple types in SLP are handled by creating the appropriate number of
6778 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6779 case of SLP. */
6780 if (slp)
6781 ncopies = 1;
6782 else
6783 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6785 gcc_assert (ncopies >= 1);
6787 /* FORNOW. This restriction should be relaxed. */
6788 if (nested_in_vect_loop && ncopies > 1)
6790 if (dump_enabled_p ())
6791 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6792 "multiple types in nested loop.\n");
6793 return false;
6796 /* Invalidate assumptions made by dependence analysis when vectorization
6797 on the unrolled body effectively re-orders stmts. */
6798 if (ncopies > 1
6799 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6800 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
6801 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6803 if (dump_enabled_p ())
6804 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6805 "cannot perform implicit CSE when unrolling "
6806 "with negative dependence distance\n");
6807 return false;
6810 elem_type = TREE_TYPE (vectype);
6811 mode = TYPE_MODE (vectype);
6813 /* FORNOW. In some cases can vectorize even if data-type not supported
6814 (e.g. - data copies). */
6815 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6817 if (dump_enabled_p ())
6818 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6819 "Aligned load, but unsupported type.\n");
6820 return false;
6823 /* Check if the load is a part of an interleaving chain. */
6824 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6826 grouped_load = true;
6827 /* FORNOW */
6828 gcc_assert (!nested_in_vect_loop);
6829 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6831 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6832 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6834 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6835 slp_perm = true;
6837 /* Invalidate assumptions made by dependence analysis when vectorization
6838 on the unrolled body effectively re-orders stmts. */
6839 if (!PURE_SLP_STMT (stmt_info)
6840 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6841 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
6842 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6844 if (dump_enabled_p ())
6845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6846 "cannot perform implicit CSE when performing "
6847 "group loads with negative dependence distance\n");
6848 return false;
6851 /* Similarly when the stmt is a load that is both part of a SLP
6852 instance and a loop vectorized stmt via the same-dr mechanism
6853 we have to give up. */
6854 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6855 && (STMT_SLP_TYPE (stmt_info)
6856 != STMT_SLP_TYPE (vinfo_for_stmt
6857 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6859 if (dump_enabled_p ())
6860 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6861 "conflicting SLP types for CSEd load\n");
6862 return false;
6866 vect_memory_access_type memory_access_type;
6867 if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, ncopies,
6868 &memory_access_type, &gs_info))
6869 return false;
6871 if (mask)
6873 if (memory_access_type == VMAT_CONTIGUOUS)
6875 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
6876 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
6877 TYPE_MODE (mask_vectype), true))
6878 return false;
6880 else if (memory_access_type == VMAT_GATHER_SCATTER)
6882 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6883 tree masktype
6884 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
6885 if (TREE_CODE (masktype) == INTEGER_TYPE)
6887 if (dump_enabled_p ())
6888 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6889 "masked gather with integer mask not"
6890 " supported.");
6891 return false;
6894 else
6896 if (dump_enabled_p ())
6897 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6898 "unsupported access type for masked load.\n");
6899 return false;
6903 if (!vec_stmt) /* transformation not required. */
6905 if (!slp)
6906 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6907 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6908 /* The SLP costs are calculated during SLP analysis. */
6909 if (!PURE_SLP_STMT (stmt_info))
6910 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
6911 NULL, NULL, NULL);
6912 return true;
6915 if (!slp)
6916 gcc_assert (memory_access_type
6917 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6919 if (dump_enabled_p ())
6920 dump_printf_loc (MSG_NOTE, vect_location,
6921 "transform load. ncopies = %d\n", ncopies);
6923 /* Transform. */
6925 ensure_base_align (dr);
6927 if (memory_access_type == VMAT_GATHER_SCATTER)
6929 vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask);
6930 return true;
6933 if (memory_access_type == VMAT_ELEMENTWISE
6934 || memory_access_type == VMAT_STRIDED_SLP)
6936 gimple_stmt_iterator incr_gsi;
6937 bool insert_after;
6938 gimple *incr;
6939 tree offvar;
6940 tree ivstep;
6941 tree running_off;
6942 vec<constructor_elt, va_gc> *v = NULL;
6943 gimple_seq stmts = NULL;
6944 tree stride_base, stride_step, alias_off;
6945 /* Checked by get_load_store_type. */
6946 unsigned int const_nunits = nunits.to_constant ();
6948 gcc_assert (!nested_in_vect_loop);
6950 if (slp && grouped_load)
6952 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6953 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6954 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6955 ref_type = get_group_alias_ptr_type (first_stmt);
6957 else
6959 first_stmt = stmt;
6960 first_dr = dr;
6961 group_size = 1;
6962 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6965 stride_base
6966 = fold_build_pointer_plus
6967 (DR_BASE_ADDRESS (first_dr),
6968 size_binop (PLUS_EXPR,
6969 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6970 convert_to_ptrofftype (DR_INIT (first_dr))));
6971 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6973 /* For a load with loop-invariant (but other than power-of-2)
6974 stride (i.e. not a grouped access) like so:
6976 for (i = 0; i < n; i += stride)
6977 ... = array[i];
6979 we generate a new induction variable and new accesses to
6980 form a new vector (or vectors, depending on ncopies):
6982 for (j = 0; ; j += VF*stride)
6983 tmp1 = array[j];
6984 tmp2 = array[j + stride];
6986 vectemp = {tmp1, tmp2, ...}
6989 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6990 build_int_cst (TREE_TYPE (stride_step), vf));
6992 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6994 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
6995 loop, &incr_gsi, insert_after,
6996 &offvar, NULL);
6997 incr = gsi_stmt (incr_gsi);
6998 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7000 stride_step = force_gimple_operand (unshare_expr (stride_step),
7001 &stmts, true, NULL_TREE);
7002 if (stmts)
7003 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
7005 prev_stmt_info = NULL;
7006 running_off = offvar;
7007 alias_off = build_int_cst (ref_type, 0);
7008 int nloads = const_nunits;
7009 int lnel = 1;
7010 tree ltype = TREE_TYPE (vectype);
7011 tree lvectype = vectype;
7012 auto_vec<tree> dr_chain;
7013 if (memory_access_type == VMAT_STRIDED_SLP)
7015 if (group_size < const_nunits)
7017 /* First check if vec_init optab supports construction from
7018 vector elts directly. */
7019 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7020 machine_mode vmode;
7021 if (mode_for_vector (elmode, group_size).exists (&vmode)
7022 && VECTOR_MODE_P (vmode)
7023 && (convert_optab_handler (vec_init_optab,
7024 TYPE_MODE (vectype), vmode)
7025 != CODE_FOR_nothing))
7027 nloads = const_nunits / group_size;
7028 lnel = group_size;
7029 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7031 else
7033 /* Otherwise avoid emitting a constructor of vector elements
7034 by performing the loads using an integer type of the same
7035 size, constructing a vector of those and then
7036 re-interpreting it as the original vector type.
7037 This avoids a huge runtime penalty due to the general
7038 inability to perform store forwarding from smaller stores
7039 to a larger load. */
7040 unsigned lsize
7041 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7042 elmode = int_mode_for_size (lsize, 0).require ();
7043 unsigned int lnunits = const_nunits / group_size;
7044 /* If we can't construct such a vector fall back to
7045 element loads of the original vector type. */
7046 if (mode_for_vector (elmode, lnunits).exists (&vmode)
7047 && VECTOR_MODE_P (vmode)
7048 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7049 != CODE_FOR_nothing))
7051 nloads = lnunits;
7052 lnel = group_size;
7053 ltype = build_nonstandard_integer_type (lsize, 1);
7054 lvectype = build_vector_type (ltype, nloads);
7058 else
7060 nloads = 1;
7061 lnel = const_nunits;
7062 ltype = vectype;
7064 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7066 if (slp)
7068 /* For SLP permutation support we need to load the whole group,
7069 not only the number of vector stmts the permutation result
7070 fits in. */
7071 if (slp_perm)
7073 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7074 variable VF. */
7075 unsigned int const_vf = vf.to_constant ();
7076 ncopies = CEIL (group_size * const_vf, const_nunits);
7077 dr_chain.create (ncopies);
7079 else
7080 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7082 unsigned int group_el = 0;
7083 unsigned HOST_WIDE_INT
7084 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7085 for (j = 0; j < ncopies; j++)
7087 if (nloads > 1)
7088 vec_alloc (v, nloads);
7089 for (i = 0; i < nloads; i++)
7091 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7092 group_el * elsz);
7093 new_stmt = gimple_build_assign (make_ssa_name (ltype),
7094 build2 (MEM_REF, ltype,
7095 running_off, this_off));
7096 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7097 if (nloads > 1)
7098 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7099 gimple_assign_lhs (new_stmt));
7101 group_el += lnel;
7102 if (! slp
7103 || group_el == group_size)
7105 tree newoff = copy_ssa_name (running_off);
7106 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7107 running_off, stride_step);
7108 vect_finish_stmt_generation (stmt, incr, gsi);
7110 running_off = newoff;
7111 group_el = 0;
7114 if (nloads > 1)
7116 tree vec_inv = build_constructor (lvectype, v);
7117 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7118 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7119 if (lvectype != vectype)
7121 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7122 VIEW_CONVERT_EXPR,
7123 build1 (VIEW_CONVERT_EXPR,
7124 vectype, new_temp));
7125 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7129 if (slp)
7131 if (slp_perm)
7132 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7133 else
7134 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7136 else
7138 if (j == 0)
7139 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7140 else
7141 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7142 prev_stmt_info = vinfo_for_stmt (new_stmt);
7145 if (slp_perm)
7147 unsigned n_perms;
7148 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7149 slp_node_instance, false, &n_perms);
7151 return true;
7154 if (grouped_load)
7156 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7157 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7158 /* For SLP vectorization we directly vectorize a subchain
7159 without permutation. */
7160 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7161 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7162 /* For BB vectorization always use the first stmt to base
7163 the data ref pointer on. */
7164 if (bb_vinfo)
7165 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7167 /* Check if the chain of loads is already vectorized. */
7168 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7169 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7170 ??? But we can only do so if there is exactly one
7171 as we have no way to get at the rest. Leave the CSE
7172 opportunity alone.
7173 ??? With the group load eventually participating
7174 in multiple different permutations (having multiple
7175 slp nodes which refer to the same group) the CSE
7176 is even wrong code. See PR56270. */
7177 && !slp)
7179 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7180 return true;
7182 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7183 group_gap_adj = 0;
7185 /* VEC_NUM is the number of vect stmts to be created for this group. */
7186 if (slp)
7188 grouped_load = false;
7189 /* For SLP permutation support we need to load the whole group,
7190 not only the number of vector stmts the permutation result
7191 fits in. */
7192 if (slp_perm)
7194 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7195 variable VF. */
7196 unsigned int const_vf = vf.to_constant ();
7197 unsigned int const_nunits = nunits.to_constant ();
7198 vec_num = CEIL (group_size * const_vf, const_nunits);
7199 group_gap_adj = vf * group_size - nunits * vec_num;
7201 else
7203 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7204 group_gap_adj
7205 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
7208 else
7209 vec_num = group_size;
7211 ref_type = get_group_alias_ptr_type (first_stmt);
7213 else
7215 first_stmt = stmt;
7216 first_dr = dr;
7217 group_size = vec_num = 1;
7218 group_gap_adj = 0;
7219 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7222 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7223 gcc_assert (alignment_support_scheme);
7224 /* Targets with load-lane instructions must not require explicit
7225 realignment. */
7226 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
7227 || alignment_support_scheme == dr_aligned
7228 || alignment_support_scheme == dr_unaligned_supported);
7230 /* In case the vectorization factor (VF) is bigger than the number
7231 of elements that we can fit in a vectype (nunits), we have to generate
7232 more than one vector stmt - i.e - we need to "unroll" the
7233 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7234 from one copy of the vector stmt to the next, in the field
7235 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7236 stages to find the correct vector defs to be used when vectorizing
7237 stmts that use the defs of the current stmt. The example below
7238 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7239 need to create 4 vectorized stmts):
7241 before vectorization:
7242 RELATED_STMT VEC_STMT
7243 S1: x = memref - -
7244 S2: z = x + 1 - -
7246 step 1: vectorize stmt S1:
7247 We first create the vector stmt VS1_0, and, as usual, record a
7248 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7249 Next, we create the vector stmt VS1_1, and record a pointer to
7250 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7251 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7252 stmts and pointers:
7253 RELATED_STMT VEC_STMT
7254 VS1_0: vx0 = memref0 VS1_1 -
7255 VS1_1: vx1 = memref1 VS1_2 -
7256 VS1_2: vx2 = memref2 VS1_3 -
7257 VS1_3: vx3 = memref3 - -
7258 S1: x = load - VS1_0
7259 S2: z = x + 1 - -
7261 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7262 information we recorded in RELATED_STMT field is used to vectorize
7263 stmt S2. */
7265 /* In case of interleaving (non-unit grouped access):
7267 S1: x2 = &base + 2
7268 S2: x0 = &base
7269 S3: x1 = &base + 1
7270 S4: x3 = &base + 3
7272 Vectorized loads are created in the order of memory accesses
7273 starting from the access of the first stmt of the chain:
7275 VS1: vx0 = &base
7276 VS2: vx1 = &base + vec_size*1
7277 VS3: vx3 = &base + vec_size*2
7278 VS4: vx4 = &base + vec_size*3
7280 Then permutation statements are generated:
7282 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7283 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7286 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7287 (the order of the data-refs in the output of vect_permute_load_chain
7288 corresponds to the order of scalar stmts in the interleaving chain - see
7289 the documentation of vect_permute_load_chain()).
7290 The generation of permutation stmts and recording them in
7291 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7293 In case of both multiple types and interleaving, the vector loads and
7294 permutation stmts above are created for every copy. The result vector
7295 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7296 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7298 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7299 on a target that supports unaligned accesses (dr_unaligned_supported)
7300 we generate the following code:
7301 p = initial_addr;
7302 indx = 0;
7303 loop {
7304 p = p + indx * vectype_size;
7305 vec_dest = *(p);
7306 indx = indx + 1;
7309 Otherwise, the data reference is potentially unaligned on a target that
7310 does not support unaligned accesses (dr_explicit_realign_optimized) -
7311 then generate the following code, in which the data in each iteration is
7312 obtained by two vector loads, one from the previous iteration, and one
7313 from the current iteration:
7314 p1 = initial_addr;
7315 msq_init = *(floor(p1))
7316 p2 = initial_addr + VS - 1;
7317 realignment_token = call target_builtin;
7318 indx = 0;
7319 loop {
7320 p2 = p2 + indx * vectype_size
7321 lsq = *(floor(p2))
7322 vec_dest = realign_load (msq, lsq, realignment_token)
7323 indx = indx + 1;
7324 msq = lsq;
7325 } */
7327 /* If the misalignment remains the same throughout the execution of the
7328 loop, we can create the init_addr and permutation mask at the loop
7329 preheader. Otherwise, it needs to be created inside the loop.
7330 This can only occur when vectorizing memory accesses in the inner-loop
7331 nested within an outer-loop that is being vectorized. */
7333 if (nested_in_vect_loop
7334 && !multiple_p (DR_STEP_ALIGNMENT (dr),
7335 GET_MODE_SIZE (TYPE_MODE (vectype))))
7337 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7338 compute_in_loop = true;
7341 if ((alignment_support_scheme == dr_explicit_realign_optimized
7342 || alignment_support_scheme == dr_explicit_realign)
7343 && !compute_in_loop)
7345 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7346 alignment_support_scheme, NULL_TREE,
7347 &at_loop);
7348 if (alignment_support_scheme == dr_explicit_realign_optimized)
7350 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7351 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7352 size_one_node);
7355 else
7356 at_loop = loop;
7358 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7359 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7361 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7362 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7363 else
7364 aggr_type = vectype;
7366 tree vec_mask = NULL_TREE;
7367 prev_stmt_info = NULL;
7368 poly_uint64 group_elt = 0;
7369 for (j = 0; j < ncopies; j++)
7371 /* 1. Create the vector or array pointer update chain. */
7372 if (j == 0)
7374 bool simd_lane_access_p
7375 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7376 if (simd_lane_access_p
7377 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7378 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7379 && integer_zerop (DR_OFFSET (first_dr))
7380 && integer_zerop (DR_INIT (first_dr))
7381 && alias_sets_conflict_p (get_alias_set (aggr_type),
7382 get_alias_set (TREE_TYPE (ref_type)))
7383 && (alignment_support_scheme == dr_aligned
7384 || alignment_support_scheme == dr_unaligned_supported))
7386 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7387 dataref_offset = build_int_cst (ref_type, 0);
7388 inv_p = false;
7390 else if (first_stmt_for_drptr
7391 && first_stmt != first_stmt_for_drptr)
7393 dataref_ptr
7394 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7395 at_loop, offset, &dummy, gsi,
7396 &ptr_incr, simd_lane_access_p,
7397 &inv_p, byte_offset);
7398 /* Adjust the pointer by the difference to first_stmt. */
7399 data_reference_p ptrdr
7400 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7401 tree diff = fold_convert (sizetype,
7402 size_binop (MINUS_EXPR,
7403 DR_INIT (first_dr),
7404 DR_INIT (ptrdr)));
7405 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7406 stmt, diff);
7408 else
7409 dataref_ptr
7410 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7411 offset, &dummy, gsi, &ptr_incr,
7412 simd_lane_access_p, &inv_p,
7413 byte_offset);
7414 if (mask)
7415 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
7416 mask_vectype);
7418 else
7420 if (dataref_offset)
7421 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7422 TYPE_SIZE_UNIT (aggr_type));
7423 else
7424 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7425 TYPE_SIZE_UNIT (aggr_type));
7426 if (mask)
7428 gimple *def_stmt;
7429 vect_def_type dt;
7430 vect_is_simple_use (vec_mask, vinfo, &def_stmt, &dt);
7431 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
7435 if (grouped_load || slp_perm)
7436 dr_chain.create (vec_num);
7438 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7440 tree vec_array;
7442 vec_array = create_vector_array (vectype, vec_num);
7444 /* Emit:
7445 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7446 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7447 gcall *call = gimple_build_call_internal (IFN_LOAD_LANES, 1,
7448 data_ref);
7449 gimple_call_set_lhs (call, vec_array);
7450 gimple_call_set_nothrow (call, true);
7451 new_stmt = call;
7452 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7454 /* Extract each vector into an SSA_NAME. */
7455 for (i = 0; i < vec_num; i++)
7457 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7458 vec_array, i);
7459 dr_chain.quick_push (new_temp);
7462 /* Record the mapping between SSA_NAMEs and statements. */
7463 vect_record_grouped_load_vectors (stmt, dr_chain);
7465 else
7467 for (i = 0; i < vec_num; i++)
7469 if (i > 0)
7470 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7471 stmt, NULL_TREE);
7473 /* 2. Create the vector-load in the loop. */
7474 switch (alignment_support_scheme)
7476 case dr_aligned:
7477 case dr_unaligned_supported:
7479 unsigned int align, misalign;
7481 align = DR_TARGET_ALIGNMENT (dr);
7482 if (alignment_support_scheme == dr_aligned)
7484 gcc_assert (aligned_access_p (first_dr));
7485 misalign = 0;
7487 else if (DR_MISALIGNMENT (first_dr) == -1)
7489 align = dr_alignment (vect_dr_behavior (first_dr));
7490 misalign = 0;
7492 else
7493 misalign = DR_MISALIGNMENT (first_dr);
7494 if (dataref_offset == NULL_TREE
7495 && TREE_CODE (dataref_ptr) == SSA_NAME)
7496 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7497 align, misalign);
7499 if (mask)
7501 align = least_bit_hwi (misalign | align);
7502 tree ptr = build_int_cst (ref_type, align);
7503 gcall *call
7504 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
7505 dataref_ptr, ptr,
7506 vec_mask);
7507 gimple_call_set_nothrow (call, true);
7508 new_stmt = call;
7509 data_ref = NULL_TREE;
7511 else
7513 data_ref
7514 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7515 dataref_offset
7516 ? dataref_offset
7517 : build_int_cst (ref_type, 0));
7518 if (alignment_support_scheme == dr_aligned)
7520 else if (DR_MISALIGNMENT (first_dr) == -1)
7521 TREE_TYPE (data_ref)
7522 = build_aligned_type (TREE_TYPE (data_ref),
7523 align * BITS_PER_UNIT);
7524 else
7525 TREE_TYPE (data_ref)
7526 = build_aligned_type (TREE_TYPE (data_ref),
7527 TYPE_ALIGN (elem_type));
7529 break;
7531 case dr_explicit_realign:
7533 tree ptr, bump;
7535 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7537 if (compute_in_loop)
7538 msq = vect_setup_realignment (first_stmt, gsi,
7539 &realignment_token,
7540 dr_explicit_realign,
7541 dataref_ptr, NULL);
7543 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7544 ptr = copy_ssa_name (dataref_ptr);
7545 else
7546 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7547 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7548 new_stmt = gimple_build_assign
7549 (ptr, BIT_AND_EXPR, dataref_ptr,
7550 build_int_cst
7551 (TREE_TYPE (dataref_ptr),
7552 -(HOST_WIDE_INT) align));
7553 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7554 data_ref
7555 = build2 (MEM_REF, vectype, ptr,
7556 build_int_cst (ref_type, 0));
7557 vec_dest = vect_create_destination_var (scalar_dest,
7558 vectype);
7559 new_stmt = gimple_build_assign (vec_dest, data_ref);
7560 new_temp = make_ssa_name (vec_dest, new_stmt);
7561 gimple_assign_set_lhs (new_stmt, new_temp);
7562 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7563 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7564 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7565 msq = new_temp;
7567 bump = size_binop (MULT_EXPR, vs,
7568 TYPE_SIZE_UNIT (elem_type));
7569 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7570 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7571 new_stmt = gimple_build_assign
7572 (NULL_TREE, BIT_AND_EXPR, ptr,
7573 build_int_cst
7574 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
7575 ptr = copy_ssa_name (ptr, new_stmt);
7576 gimple_assign_set_lhs (new_stmt, ptr);
7577 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7578 data_ref
7579 = build2 (MEM_REF, vectype, ptr,
7580 build_int_cst (ref_type, 0));
7581 break;
7583 case dr_explicit_realign_optimized:
7585 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7586 new_temp = copy_ssa_name (dataref_ptr);
7587 else
7588 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7589 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7590 new_stmt = gimple_build_assign
7591 (new_temp, BIT_AND_EXPR, dataref_ptr,
7592 build_int_cst (TREE_TYPE (dataref_ptr),
7593 -(HOST_WIDE_INT) align));
7594 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7595 data_ref
7596 = build2 (MEM_REF, vectype, new_temp,
7597 build_int_cst (ref_type, 0));
7598 break;
7600 default:
7601 gcc_unreachable ();
7603 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7604 /* DATA_REF is null if we've already built the statement. */
7605 if (data_ref)
7606 new_stmt = gimple_build_assign (vec_dest, data_ref);
7607 new_temp = make_ssa_name (vec_dest, new_stmt);
7608 gimple_set_lhs (new_stmt, new_temp);
7609 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7611 /* 3. Handle explicit realignment if necessary/supported.
7612 Create in loop:
7613 vec_dest = realign_load (msq, lsq, realignment_token) */
7614 if (alignment_support_scheme == dr_explicit_realign_optimized
7615 || alignment_support_scheme == dr_explicit_realign)
7617 lsq = gimple_assign_lhs (new_stmt);
7618 if (!realignment_token)
7619 realignment_token = dataref_ptr;
7620 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7621 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7622 msq, lsq, realignment_token);
7623 new_temp = make_ssa_name (vec_dest, new_stmt);
7624 gimple_assign_set_lhs (new_stmt, new_temp);
7625 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7627 if (alignment_support_scheme == dr_explicit_realign_optimized)
7629 gcc_assert (phi);
7630 if (i == vec_num - 1 && j == ncopies - 1)
7631 add_phi_arg (phi, lsq,
7632 loop_latch_edge (containing_loop),
7633 UNKNOWN_LOCATION);
7634 msq = lsq;
7638 /* 4. Handle invariant-load. */
7639 if (inv_p && !bb_vinfo)
7641 gcc_assert (!grouped_load);
7642 /* If we have versioned for aliasing or the loop doesn't
7643 have any data dependencies that would preclude this,
7644 then we are sure this is a loop invariant load and
7645 thus we can insert it on the preheader edge. */
7646 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7647 && !nested_in_vect_loop
7648 && hoist_defs_of_uses (stmt, loop))
7650 if (dump_enabled_p ())
7652 dump_printf_loc (MSG_NOTE, vect_location,
7653 "hoisting out of the vectorized "
7654 "loop: ");
7655 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7657 tree tem = copy_ssa_name (scalar_dest);
7658 gsi_insert_on_edge_immediate
7659 (loop_preheader_edge (loop),
7660 gimple_build_assign (tem,
7661 unshare_expr
7662 (gimple_assign_rhs1 (stmt))));
7663 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7664 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7665 set_vinfo_for_stmt (new_stmt,
7666 new_stmt_vec_info (new_stmt, vinfo));
7668 else
7670 gimple_stmt_iterator gsi2 = *gsi;
7671 gsi_next (&gsi2);
7672 new_temp = vect_init_vector (stmt, scalar_dest,
7673 vectype, &gsi2);
7674 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7678 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7680 tree perm_mask = perm_mask_for_reverse (vectype);
7681 new_temp = permute_vec_elements (new_temp, new_temp,
7682 perm_mask, stmt, gsi);
7683 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7686 /* Collect vector loads and later create their permutation in
7687 vect_transform_grouped_load (). */
7688 if (grouped_load || slp_perm)
7689 dr_chain.quick_push (new_temp);
7691 /* Store vector loads in the corresponding SLP_NODE. */
7692 if (slp && !slp_perm)
7693 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7695 /* With SLP permutation we load the gaps as well, without
7696 we need to skip the gaps after we manage to fully load
7697 all elements. group_gap_adj is GROUP_SIZE here. */
7698 group_elt += nunits;
7699 if (maybe_ne (group_gap_adj, 0U)
7700 && !slp_perm
7701 && known_eq (group_elt, group_size - group_gap_adj))
7703 poly_wide_int bump_val
7704 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7705 * group_gap_adj);
7706 tree bump = wide_int_to_tree (sizetype, bump_val);
7707 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7708 stmt, bump);
7709 group_elt = 0;
7712 /* Bump the vector pointer to account for a gap or for excess
7713 elements loaded for a permuted SLP load. */
7714 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
7716 poly_wide_int bump_val
7717 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7718 * group_gap_adj);
7719 tree bump = wide_int_to_tree (sizetype, bump_val);
7720 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7721 stmt, bump);
7725 if (slp && !slp_perm)
7726 continue;
7728 if (slp_perm)
7730 unsigned n_perms;
7731 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7732 slp_node_instance, false,
7733 &n_perms))
7735 dr_chain.release ();
7736 return false;
7739 else
7741 if (grouped_load)
7743 if (memory_access_type != VMAT_LOAD_STORE_LANES)
7744 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7745 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7747 else
7749 if (j == 0)
7750 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7751 else
7752 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7753 prev_stmt_info = vinfo_for_stmt (new_stmt);
7756 dr_chain.release ();
7759 return true;
7762 /* Function vect_is_simple_cond.
7764 Input:
7765 LOOP - the loop that is being vectorized.
7766 COND - Condition that is checked for simple use.
7768 Output:
7769 *COMP_VECTYPE - the vector type for the comparison.
7770 *DTS - The def types for the arguments of the comparison
7772 Returns whether a COND can be vectorized. Checks whether
7773 condition operands are supportable using vec_is_simple_use. */
7775 static bool
7776 vect_is_simple_cond (tree cond, vec_info *vinfo,
7777 tree *comp_vectype, enum vect_def_type *dts,
7778 tree vectype)
7780 tree lhs, rhs;
7781 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7783 /* Mask case. */
7784 if (TREE_CODE (cond) == SSA_NAME
7785 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
7787 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7788 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7789 &dts[0], comp_vectype)
7790 || !*comp_vectype
7791 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7792 return false;
7793 return true;
7796 if (!COMPARISON_CLASS_P (cond))
7797 return false;
7799 lhs = TREE_OPERAND (cond, 0);
7800 rhs = TREE_OPERAND (cond, 1);
7802 if (TREE_CODE (lhs) == SSA_NAME)
7804 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7805 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
7806 return false;
7808 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
7809 || TREE_CODE (lhs) == FIXED_CST)
7810 dts[0] = vect_constant_def;
7811 else
7812 return false;
7814 if (TREE_CODE (rhs) == SSA_NAME)
7816 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7817 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
7818 return false;
7820 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
7821 || TREE_CODE (rhs) == FIXED_CST)
7822 dts[1] = vect_constant_def;
7823 else
7824 return false;
7826 if (vectype1 && vectype2
7827 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
7828 TYPE_VECTOR_SUBPARTS (vectype2)))
7829 return false;
7831 *comp_vectype = vectype1 ? vectype1 : vectype2;
7832 /* Invariant comparison. */
7833 if (! *comp_vectype)
7835 tree scalar_type = TREE_TYPE (lhs);
7836 /* If we can widen the comparison to match vectype do so. */
7837 if (INTEGRAL_TYPE_P (scalar_type)
7838 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
7839 TYPE_SIZE (TREE_TYPE (vectype))))
7840 scalar_type = build_nonstandard_integer_type
7841 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
7842 TYPE_UNSIGNED (scalar_type));
7843 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
7846 return true;
7849 /* vectorizable_condition.
7851 Check if STMT is conditional modify expression that can be vectorized.
7852 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7853 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7854 at GSI.
7856 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7857 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7858 else clause if it is 2).
7860 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7862 bool
7863 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7864 gimple **vec_stmt, tree reduc_def, int reduc_index,
7865 slp_tree slp_node)
7867 tree scalar_dest = NULL_TREE;
7868 tree vec_dest = NULL_TREE;
7869 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
7870 tree then_clause, else_clause;
7871 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7872 tree comp_vectype = NULL_TREE;
7873 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7874 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7875 tree vec_compare;
7876 tree new_temp;
7877 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7878 enum vect_def_type dts[4]
7879 = {vect_unknown_def_type, vect_unknown_def_type,
7880 vect_unknown_def_type, vect_unknown_def_type};
7881 int ndts = 4;
7882 int ncopies;
7883 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
7884 stmt_vec_info prev_stmt_info = NULL;
7885 int i, j;
7886 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7887 vec<tree> vec_oprnds0 = vNULL;
7888 vec<tree> vec_oprnds1 = vNULL;
7889 vec<tree> vec_oprnds2 = vNULL;
7890 vec<tree> vec_oprnds3 = vNULL;
7891 tree vec_cmp_type;
7892 bool masked = false;
7894 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7895 return false;
7897 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7899 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7900 return false;
7902 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7903 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7904 && reduc_def))
7905 return false;
7907 /* FORNOW: not yet supported. */
7908 if (STMT_VINFO_LIVE_P (stmt_info))
7910 if (dump_enabled_p ())
7911 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7912 "value used after loop.\n");
7913 return false;
7917 /* Is vectorizable conditional operation? */
7918 if (!is_gimple_assign (stmt))
7919 return false;
7921 code = gimple_assign_rhs_code (stmt);
7923 if (code != COND_EXPR)
7924 return false;
7926 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7927 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7929 if (slp_node)
7930 ncopies = 1;
7931 else
7932 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7934 gcc_assert (ncopies >= 1);
7935 if (reduc_index && ncopies > 1)
7936 return false; /* FORNOW */
7938 cond_expr = gimple_assign_rhs1 (stmt);
7939 then_clause = gimple_assign_rhs2 (stmt);
7940 else_clause = gimple_assign_rhs3 (stmt);
7942 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
7943 &comp_vectype, &dts[0], vectype)
7944 || !comp_vectype)
7945 return false;
7947 gimple *def_stmt;
7948 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
7949 &vectype1))
7950 return false;
7951 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
7952 &vectype2))
7953 return false;
7955 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7956 return false;
7958 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
7959 return false;
7961 masked = !COMPARISON_CLASS_P (cond_expr);
7962 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7964 if (vec_cmp_type == NULL_TREE)
7965 return false;
7967 cond_code = TREE_CODE (cond_expr);
7968 if (!masked)
7970 cond_expr0 = TREE_OPERAND (cond_expr, 0);
7971 cond_expr1 = TREE_OPERAND (cond_expr, 1);
7974 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
7976 /* Boolean values may have another representation in vectors
7977 and therefore we prefer bit operations over comparison for
7978 them (which also works for scalar masks). We store opcodes
7979 to use in bitop1 and bitop2. Statement is vectorized as
7980 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
7981 depending on bitop1 and bitop2 arity. */
7982 switch (cond_code)
7984 case GT_EXPR:
7985 bitop1 = BIT_NOT_EXPR;
7986 bitop2 = BIT_AND_EXPR;
7987 break;
7988 case GE_EXPR:
7989 bitop1 = BIT_NOT_EXPR;
7990 bitop2 = BIT_IOR_EXPR;
7991 break;
7992 case LT_EXPR:
7993 bitop1 = BIT_NOT_EXPR;
7994 bitop2 = BIT_AND_EXPR;
7995 std::swap (cond_expr0, cond_expr1);
7996 break;
7997 case LE_EXPR:
7998 bitop1 = BIT_NOT_EXPR;
7999 bitop2 = BIT_IOR_EXPR;
8000 std::swap (cond_expr0, cond_expr1);
8001 break;
8002 case NE_EXPR:
8003 bitop1 = BIT_XOR_EXPR;
8004 break;
8005 case EQ_EXPR:
8006 bitop1 = BIT_XOR_EXPR;
8007 bitop2 = BIT_NOT_EXPR;
8008 break;
8009 default:
8010 return false;
8012 cond_code = SSA_NAME;
8015 if (!vec_stmt)
8017 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8018 if (bitop1 != NOP_EXPR)
8020 machine_mode mode = TYPE_MODE (comp_vectype);
8021 optab optab;
8023 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8024 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8025 return false;
8027 if (bitop2 != NOP_EXPR)
8029 optab = optab_for_tree_code (bitop2, comp_vectype,
8030 optab_default);
8031 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8032 return false;
8035 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8036 cond_code))
8038 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
8039 return true;
8041 return false;
8044 /* Transform. */
8046 if (!slp_node)
8048 vec_oprnds0.create (1);
8049 vec_oprnds1.create (1);
8050 vec_oprnds2.create (1);
8051 vec_oprnds3.create (1);
8054 /* Handle def. */
8055 scalar_dest = gimple_assign_lhs (stmt);
8056 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8058 /* Handle cond expr. */
8059 for (j = 0; j < ncopies; j++)
8061 gassign *new_stmt = NULL;
8062 if (j == 0)
8064 if (slp_node)
8066 auto_vec<tree, 4> ops;
8067 auto_vec<vec<tree>, 4> vec_defs;
8069 if (masked)
8070 ops.safe_push (cond_expr);
8071 else
8073 ops.safe_push (cond_expr0);
8074 ops.safe_push (cond_expr1);
8076 ops.safe_push (then_clause);
8077 ops.safe_push (else_clause);
8078 vect_get_slp_defs (ops, slp_node, &vec_defs);
8079 vec_oprnds3 = vec_defs.pop ();
8080 vec_oprnds2 = vec_defs.pop ();
8081 if (!masked)
8082 vec_oprnds1 = vec_defs.pop ();
8083 vec_oprnds0 = vec_defs.pop ();
8085 else
8087 gimple *gtemp;
8088 if (masked)
8090 vec_cond_lhs
8091 = vect_get_vec_def_for_operand (cond_expr, stmt,
8092 comp_vectype);
8093 vect_is_simple_use (cond_expr, stmt_info->vinfo,
8094 &gtemp, &dts[0]);
8096 else
8098 vec_cond_lhs
8099 = vect_get_vec_def_for_operand (cond_expr0,
8100 stmt, comp_vectype);
8101 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8103 vec_cond_rhs
8104 = vect_get_vec_def_for_operand (cond_expr1,
8105 stmt, comp_vectype);
8106 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
8108 if (reduc_index == 1)
8109 vec_then_clause = reduc_def;
8110 else
8112 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8113 stmt);
8114 vect_is_simple_use (then_clause, loop_vinfo,
8115 &gtemp, &dts[2]);
8117 if (reduc_index == 2)
8118 vec_else_clause = reduc_def;
8119 else
8121 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8122 stmt);
8123 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
8127 else
8129 vec_cond_lhs
8130 = vect_get_vec_def_for_stmt_copy (dts[0],
8131 vec_oprnds0.pop ());
8132 if (!masked)
8133 vec_cond_rhs
8134 = vect_get_vec_def_for_stmt_copy (dts[1],
8135 vec_oprnds1.pop ());
8137 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8138 vec_oprnds2.pop ());
8139 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8140 vec_oprnds3.pop ());
8143 if (!slp_node)
8145 vec_oprnds0.quick_push (vec_cond_lhs);
8146 if (!masked)
8147 vec_oprnds1.quick_push (vec_cond_rhs);
8148 vec_oprnds2.quick_push (vec_then_clause);
8149 vec_oprnds3.quick_push (vec_else_clause);
8152 /* Arguments are ready. Create the new vector stmt. */
8153 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8155 vec_then_clause = vec_oprnds2[i];
8156 vec_else_clause = vec_oprnds3[i];
8158 if (masked)
8159 vec_compare = vec_cond_lhs;
8160 else
8162 vec_cond_rhs = vec_oprnds1[i];
8163 if (bitop1 == NOP_EXPR)
8164 vec_compare = build2 (cond_code, vec_cmp_type,
8165 vec_cond_lhs, vec_cond_rhs);
8166 else
8168 new_temp = make_ssa_name (vec_cmp_type);
8169 if (bitop1 == BIT_NOT_EXPR)
8170 new_stmt = gimple_build_assign (new_temp, bitop1,
8171 vec_cond_rhs);
8172 else
8173 new_stmt
8174 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8175 vec_cond_rhs);
8176 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8177 if (bitop2 == NOP_EXPR)
8178 vec_compare = new_temp;
8179 else if (bitop2 == BIT_NOT_EXPR)
8181 /* Instead of doing ~x ? y : z do x ? z : y. */
8182 vec_compare = new_temp;
8183 std::swap (vec_then_clause, vec_else_clause);
8185 else
8187 vec_compare = make_ssa_name (vec_cmp_type);
8188 new_stmt
8189 = gimple_build_assign (vec_compare, bitop2,
8190 vec_cond_lhs, new_temp);
8191 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8195 new_temp = make_ssa_name (vec_dest);
8196 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8197 vec_compare, vec_then_clause,
8198 vec_else_clause);
8199 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8200 if (slp_node)
8201 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8204 if (slp_node)
8205 continue;
8207 if (j == 0)
8208 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8209 else
8210 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8212 prev_stmt_info = vinfo_for_stmt (new_stmt);
8215 vec_oprnds0.release ();
8216 vec_oprnds1.release ();
8217 vec_oprnds2.release ();
8218 vec_oprnds3.release ();
8220 return true;
8223 /* vectorizable_comparison.
8225 Check if STMT is comparison expression that can be vectorized.
8226 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8227 comparison, put it in VEC_STMT, and insert it at GSI.
8229 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8231 static bool
8232 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8233 gimple **vec_stmt, tree reduc_def,
8234 slp_tree slp_node)
8236 tree lhs, rhs1, rhs2;
8237 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8238 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8239 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8240 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8241 tree new_temp;
8242 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8243 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
8244 int ndts = 2;
8245 poly_uint64 nunits;
8246 int ncopies;
8247 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8248 stmt_vec_info prev_stmt_info = NULL;
8249 int i, j;
8250 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8251 vec<tree> vec_oprnds0 = vNULL;
8252 vec<tree> vec_oprnds1 = vNULL;
8253 gimple *def_stmt;
8254 tree mask_type;
8255 tree mask;
8257 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8258 return false;
8260 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
8261 return false;
8263 mask_type = vectype;
8264 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8266 if (slp_node)
8267 ncopies = 1;
8268 else
8269 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8271 gcc_assert (ncopies >= 1);
8272 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8273 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8274 && reduc_def))
8275 return false;
8277 if (STMT_VINFO_LIVE_P (stmt_info))
8279 if (dump_enabled_p ())
8280 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8281 "value used after loop.\n");
8282 return false;
8285 if (!is_gimple_assign (stmt))
8286 return false;
8288 code = gimple_assign_rhs_code (stmt);
8290 if (TREE_CODE_CLASS (code) != tcc_comparison)
8291 return false;
8293 rhs1 = gimple_assign_rhs1 (stmt);
8294 rhs2 = gimple_assign_rhs2 (stmt);
8296 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8297 &dts[0], &vectype1))
8298 return false;
8300 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8301 &dts[1], &vectype2))
8302 return false;
8304 if (vectype1 && vectype2
8305 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8306 TYPE_VECTOR_SUBPARTS (vectype2)))
8307 return false;
8309 vectype = vectype1 ? vectype1 : vectype2;
8311 /* Invariant comparison. */
8312 if (!vectype)
8314 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
8315 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
8316 return false;
8318 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
8319 return false;
8321 /* Can't compare mask and non-mask types. */
8322 if (vectype1 && vectype2
8323 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
8324 return false;
8326 /* Boolean values may have another representation in vectors
8327 and therefore we prefer bit operations over comparison for
8328 them (which also works for scalar masks). We store opcodes
8329 to use in bitop1 and bitop2. Statement is vectorized as
8330 BITOP2 (rhs1 BITOP1 rhs2) or
8331 rhs1 BITOP2 (BITOP1 rhs2)
8332 depending on bitop1 and bitop2 arity. */
8333 if (VECTOR_BOOLEAN_TYPE_P (vectype))
8335 if (code == GT_EXPR)
8337 bitop1 = BIT_NOT_EXPR;
8338 bitop2 = BIT_AND_EXPR;
8340 else if (code == GE_EXPR)
8342 bitop1 = BIT_NOT_EXPR;
8343 bitop2 = BIT_IOR_EXPR;
8345 else if (code == LT_EXPR)
8347 bitop1 = BIT_NOT_EXPR;
8348 bitop2 = BIT_AND_EXPR;
8349 std::swap (rhs1, rhs2);
8350 std::swap (dts[0], dts[1]);
8352 else if (code == LE_EXPR)
8354 bitop1 = BIT_NOT_EXPR;
8355 bitop2 = BIT_IOR_EXPR;
8356 std::swap (rhs1, rhs2);
8357 std::swap (dts[0], dts[1]);
8359 else
8361 bitop1 = BIT_XOR_EXPR;
8362 if (code == EQ_EXPR)
8363 bitop2 = BIT_NOT_EXPR;
8367 if (!vec_stmt)
8369 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
8370 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
8371 dts, ndts, NULL, NULL);
8372 if (bitop1 == NOP_EXPR)
8373 return expand_vec_cmp_expr_p (vectype, mask_type, code);
8374 else
8376 machine_mode mode = TYPE_MODE (vectype);
8377 optab optab;
8379 optab = optab_for_tree_code (bitop1, vectype, optab_default);
8380 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8381 return false;
8383 if (bitop2 != NOP_EXPR)
8385 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8386 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8387 return false;
8389 return true;
8393 /* Transform. */
8394 if (!slp_node)
8396 vec_oprnds0.create (1);
8397 vec_oprnds1.create (1);
8400 /* Handle def. */
8401 lhs = gimple_assign_lhs (stmt);
8402 mask = vect_create_destination_var (lhs, mask_type);
8404 /* Handle cmp expr. */
8405 for (j = 0; j < ncopies; j++)
8407 gassign *new_stmt = NULL;
8408 if (j == 0)
8410 if (slp_node)
8412 auto_vec<tree, 2> ops;
8413 auto_vec<vec<tree>, 2> vec_defs;
8415 ops.safe_push (rhs1);
8416 ops.safe_push (rhs2);
8417 vect_get_slp_defs (ops, slp_node, &vec_defs);
8418 vec_oprnds1 = vec_defs.pop ();
8419 vec_oprnds0 = vec_defs.pop ();
8421 else
8423 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8424 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
8427 else
8429 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8430 vec_oprnds0.pop ());
8431 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8432 vec_oprnds1.pop ());
8435 if (!slp_node)
8437 vec_oprnds0.quick_push (vec_rhs1);
8438 vec_oprnds1.quick_push (vec_rhs2);
8441 /* Arguments are ready. Create the new vector stmt. */
8442 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8444 vec_rhs2 = vec_oprnds1[i];
8446 new_temp = make_ssa_name (mask);
8447 if (bitop1 == NOP_EXPR)
8449 new_stmt = gimple_build_assign (new_temp, code,
8450 vec_rhs1, vec_rhs2);
8451 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8453 else
8455 if (bitop1 == BIT_NOT_EXPR)
8456 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8457 else
8458 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8459 vec_rhs2);
8460 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8461 if (bitop2 != NOP_EXPR)
8463 tree res = make_ssa_name (mask);
8464 if (bitop2 == BIT_NOT_EXPR)
8465 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8466 else
8467 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8468 new_temp);
8469 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8472 if (slp_node)
8473 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8476 if (slp_node)
8477 continue;
8479 if (j == 0)
8480 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8481 else
8482 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8484 prev_stmt_info = vinfo_for_stmt (new_stmt);
8487 vec_oprnds0.release ();
8488 vec_oprnds1.release ();
8490 return true;
8493 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
8494 can handle all live statements in the node. Otherwise return true
8495 if STMT is not live or if vectorizable_live_operation can handle it.
8496 GSI and VEC_STMT are as for vectorizable_live_operation. */
8498 static bool
8499 can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
8500 slp_tree slp_node, gimple **vec_stmt)
8502 if (slp_node)
8504 gimple *slp_stmt;
8505 unsigned int i;
8506 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8508 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8509 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8510 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8511 vec_stmt))
8512 return false;
8515 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
8516 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
8517 return false;
8519 return true;
8522 /* Make sure the statement is vectorizable. */
8524 bool
8525 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
8526 slp_instance node_instance)
8528 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8529 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8530 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
8531 bool ok;
8532 gimple *pattern_stmt;
8533 gimple_seq pattern_def_seq;
8535 if (dump_enabled_p ())
8537 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8538 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8541 if (gimple_has_volatile_ops (stmt))
8543 if (dump_enabled_p ())
8544 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8545 "not vectorized: stmt has volatile operands\n");
8547 return false;
8550 /* Skip stmts that do not need to be vectorized. In loops this is expected
8551 to include:
8552 - the COND_EXPR which is the loop exit condition
8553 - any LABEL_EXPRs in the loop
8554 - computations that are used only for array indexing or loop control.
8555 In basic blocks we only analyze statements that are a part of some SLP
8556 instance, therefore, all the statements are relevant.
8558 Pattern statement needs to be analyzed instead of the original statement
8559 if the original statement is not relevant. Otherwise, we analyze both
8560 statements. In basic blocks we are called from some SLP instance
8561 traversal, don't analyze pattern stmts instead, the pattern stmts
8562 already will be part of SLP instance. */
8564 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
8565 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8566 && !STMT_VINFO_LIVE_P (stmt_info))
8568 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8569 && pattern_stmt
8570 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8571 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8573 /* Analyze PATTERN_STMT instead of the original stmt. */
8574 stmt = pattern_stmt;
8575 stmt_info = vinfo_for_stmt (pattern_stmt);
8576 if (dump_enabled_p ())
8578 dump_printf_loc (MSG_NOTE, vect_location,
8579 "==> examining pattern statement: ");
8580 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8583 else
8585 if (dump_enabled_p ())
8586 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
8588 return true;
8591 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8592 && node == NULL
8593 && pattern_stmt
8594 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8595 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8597 /* Analyze PATTERN_STMT too. */
8598 if (dump_enabled_p ())
8600 dump_printf_loc (MSG_NOTE, vect_location,
8601 "==> examining pattern statement: ");
8602 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8605 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
8606 node_instance))
8607 return false;
8610 if (is_pattern_stmt_p (stmt_info)
8611 && node == NULL
8612 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
8614 gimple_stmt_iterator si;
8616 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8618 gimple *pattern_def_stmt = gsi_stmt (si);
8619 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8620 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8622 /* Analyze def stmt of STMT if it's a pattern stmt. */
8623 if (dump_enabled_p ())
8625 dump_printf_loc (MSG_NOTE, vect_location,
8626 "==> examining pattern def statement: ");
8627 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8630 if (!vect_analyze_stmt (pattern_def_stmt,
8631 need_to_vectorize, node, node_instance))
8632 return false;
8637 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8639 case vect_internal_def:
8640 break;
8642 case vect_reduction_def:
8643 case vect_nested_cycle:
8644 gcc_assert (!bb_vinfo
8645 && (relevance == vect_used_in_outer
8646 || relevance == vect_used_in_outer_by_reduction
8647 || relevance == vect_used_by_reduction
8648 || relevance == vect_unused_in_scope
8649 || relevance == vect_used_only_live));
8650 break;
8652 case vect_induction_def:
8653 gcc_assert (!bb_vinfo);
8654 break;
8656 case vect_constant_def:
8657 case vect_external_def:
8658 case vect_unknown_def_type:
8659 default:
8660 gcc_unreachable ();
8663 if (STMT_VINFO_RELEVANT_P (stmt_info))
8665 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8666 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8667 || (is_gimple_call (stmt)
8668 && gimple_call_lhs (stmt) == NULL_TREE));
8669 *need_to_vectorize = true;
8672 if (PURE_SLP_STMT (stmt_info) && !node)
8674 dump_printf_loc (MSG_NOTE, vect_location,
8675 "handled only by SLP analysis\n");
8676 return true;
8679 ok = true;
8680 if (!bb_vinfo
8681 && (STMT_VINFO_RELEVANT_P (stmt_info)
8682 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8683 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8684 || vectorizable_conversion (stmt, NULL, NULL, node)
8685 || vectorizable_shift (stmt, NULL, NULL, node)
8686 || vectorizable_operation (stmt, NULL, NULL, node)
8687 || vectorizable_assignment (stmt, NULL, NULL, node)
8688 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8689 || vectorizable_call (stmt, NULL, NULL, node)
8690 || vectorizable_store (stmt, NULL, NULL, node)
8691 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
8692 || vectorizable_induction (stmt, NULL, NULL, node)
8693 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8694 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8695 else
8697 if (bb_vinfo)
8698 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8699 || vectorizable_conversion (stmt, NULL, NULL, node)
8700 || vectorizable_shift (stmt, NULL, NULL, node)
8701 || vectorizable_operation (stmt, NULL, NULL, node)
8702 || vectorizable_assignment (stmt, NULL, NULL, node)
8703 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8704 || vectorizable_call (stmt, NULL, NULL, node)
8705 || vectorizable_store (stmt, NULL, NULL, node)
8706 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8707 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8710 if (!ok)
8712 if (dump_enabled_p ())
8714 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8715 "not vectorized: relevant stmt not ");
8716 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8717 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8720 return false;
8723 if (bb_vinfo)
8724 return true;
8726 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8727 need extra handling, except for vectorizable reductions. */
8728 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8729 && !can_vectorize_live_stmts (stmt, NULL, node, NULL))
8731 if (dump_enabled_p ())
8733 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8734 "not vectorized: live stmt not supported: ");
8735 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8738 return false;
8741 return true;
8745 /* Function vect_transform_stmt.
8747 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8749 bool
8750 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8751 bool *grouped_store, slp_tree slp_node,
8752 slp_instance slp_node_instance)
8754 bool is_store = false;
8755 gimple *vec_stmt = NULL;
8756 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8757 bool done;
8759 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
8760 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8762 switch (STMT_VINFO_TYPE (stmt_info))
8764 case type_demotion_vec_info_type:
8765 case type_promotion_vec_info_type:
8766 case type_conversion_vec_info_type:
8767 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8768 gcc_assert (done);
8769 break;
8771 case induc_vec_info_type:
8772 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
8773 gcc_assert (done);
8774 break;
8776 case shift_vec_info_type:
8777 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8778 gcc_assert (done);
8779 break;
8781 case op_vec_info_type:
8782 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8783 gcc_assert (done);
8784 break;
8786 case assignment_vec_info_type:
8787 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8788 gcc_assert (done);
8789 break;
8791 case load_vec_info_type:
8792 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8793 slp_node_instance);
8794 gcc_assert (done);
8795 break;
8797 case store_vec_info_type:
8798 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8799 gcc_assert (done);
8800 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8802 /* In case of interleaving, the whole chain is vectorized when the
8803 last store in the chain is reached. Store stmts before the last
8804 one are skipped, and there vec_stmt_info shouldn't be freed
8805 meanwhile. */
8806 *grouped_store = true;
8807 if (STMT_VINFO_VEC_STMT (stmt_info))
8808 is_store = true;
8810 else
8811 is_store = true;
8812 break;
8814 case condition_vec_info_type:
8815 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8816 gcc_assert (done);
8817 break;
8819 case comparison_vec_info_type:
8820 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8821 gcc_assert (done);
8822 break;
8824 case call_vec_info_type:
8825 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8826 stmt = gsi_stmt (*gsi);
8827 break;
8829 case call_simd_clone_vec_info_type:
8830 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8831 stmt = gsi_stmt (*gsi);
8832 break;
8834 case reduc_vec_info_type:
8835 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
8836 slp_node_instance);
8837 gcc_assert (done);
8838 break;
8840 default:
8841 if (!STMT_VINFO_LIVE_P (stmt_info))
8843 if (dump_enabled_p ())
8844 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8845 "stmt not supported.\n");
8846 gcc_unreachable ();
8850 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8851 This would break hybrid SLP vectorization. */
8852 if (slp_node)
8853 gcc_assert (!vec_stmt
8854 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8856 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8857 is being vectorized, but outside the immediately enclosing loop. */
8858 if (vec_stmt
8859 && STMT_VINFO_LOOP_VINFO (stmt_info)
8860 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8861 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8862 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8863 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8864 || STMT_VINFO_RELEVANT (stmt_info) ==
8865 vect_used_in_outer_by_reduction))
8867 struct loop *innerloop = LOOP_VINFO_LOOP (
8868 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8869 imm_use_iterator imm_iter;
8870 use_operand_p use_p;
8871 tree scalar_dest;
8872 gimple *exit_phi;
8874 if (dump_enabled_p ())
8875 dump_printf_loc (MSG_NOTE, vect_location,
8876 "Record the vdef for outer-loop vectorization.\n");
8878 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8879 (to be used when vectorizing outer-loop stmts that use the DEF of
8880 STMT). */
8881 if (gimple_code (stmt) == GIMPLE_PHI)
8882 scalar_dest = PHI_RESULT (stmt);
8883 else
8884 scalar_dest = gimple_assign_lhs (stmt);
8886 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8888 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8890 exit_phi = USE_STMT (use_p);
8891 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8896 /* Handle stmts whose DEF is used outside the loop-nest that is
8897 being vectorized. */
8898 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8900 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
8901 gcc_assert (done);
8904 if (vec_stmt)
8905 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8907 return is_store;
8911 /* Remove a group of stores (for SLP or interleaving), free their
8912 stmt_vec_info. */
8914 void
8915 vect_remove_stores (gimple *first_stmt)
8917 gimple *next = first_stmt;
8918 gimple *tmp;
8919 gimple_stmt_iterator next_si;
8921 while (next)
8923 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8925 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8926 if (is_pattern_stmt_p (stmt_info))
8927 next = STMT_VINFO_RELATED_STMT (stmt_info);
8928 /* Free the attached stmt_vec_info and remove the stmt. */
8929 next_si = gsi_for_stmt (next);
8930 unlink_stmt_vdef (next);
8931 gsi_remove (&next_si, true);
8932 release_defs (next);
8933 free_stmt_vec_info (next);
8934 next = tmp;
8939 /* Function new_stmt_vec_info.
8941 Create and initialize a new stmt_vec_info struct for STMT. */
8943 stmt_vec_info
8944 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
8946 stmt_vec_info res;
8947 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8949 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8950 STMT_VINFO_STMT (res) = stmt;
8951 res->vinfo = vinfo;
8952 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
8953 STMT_VINFO_LIVE_P (res) = false;
8954 STMT_VINFO_VECTYPE (res) = NULL;
8955 STMT_VINFO_VEC_STMT (res) = NULL;
8956 STMT_VINFO_VECTORIZABLE (res) = true;
8957 STMT_VINFO_IN_PATTERN_P (res) = false;
8958 STMT_VINFO_RELATED_STMT (res) = NULL;
8959 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
8960 STMT_VINFO_DATA_REF (res) = NULL;
8961 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
8962 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
8964 if (gimple_code (stmt) == GIMPLE_PHI
8965 && is_loop_header_bb_p (gimple_bb (stmt)))
8966 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8967 else
8968 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8970 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
8971 STMT_SLP_TYPE (res) = loop_vect;
8972 STMT_VINFO_NUM_SLP_USES (res) = 0;
8974 GROUP_FIRST_ELEMENT (res) = NULL;
8975 GROUP_NEXT_ELEMENT (res) = NULL;
8976 GROUP_SIZE (res) = 0;
8977 GROUP_STORE_COUNT (res) = 0;
8978 GROUP_GAP (res) = 0;
8979 GROUP_SAME_DR_STMT (res) = NULL;
8981 return res;
8985 /* Create a hash table for stmt_vec_info. */
8987 void
8988 init_stmt_vec_info_vec (void)
8990 gcc_assert (!stmt_vec_info_vec.exists ());
8991 stmt_vec_info_vec.create (50);
8995 /* Free hash table for stmt_vec_info. */
8997 void
8998 free_stmt_vec_info_vec (void)
9000 unsigned int i;
9001 stmt_vec_info info;
9002 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
9003 if (info != NULL)
9004 free_stmt_vec_info (STMT_VINFO_STMT (info));
9005 gcc_assert (stmt_vec_info_vec.exists ());
9006 stmt_vec_info_vec.release ();
9010 /* Free stmt vectorization related info. */
9012 void
9013 free_stmt_vec_info (gimple *stmt)
9015 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9017 if (!stmt_info)
9018 return;
9020 /* Check if this statement has a related "pattern stmt"
9021 (introduced by the vectorizer during the pattern recognition
9022 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9023 too. */
9024 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9026 stmt_vec_info patt_info
9027 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9028 if (patt_info)
9030 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
9031 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
9032 gimple_set_bb (patt_stmt, NULL);
9033 tree lhs = gimple_get_lhs (patt_stmt);
9034 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9035 release_ssa_name (lhs);
9036 if (seq)
9038 gimple_stmt_iterator si;
9039 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
9041 gimple *seq_stmt = gsi_stmt (si);
9042 gimple_set_bb (seq_stmt, NULL);
9043 lhs = gimple_get_lhs (seq_stmt);
9044 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9045 release_ssa_name (lhs);
9046 free_stmt_vec_info (seq_stmt);
9049 free_stmt_vec_info (patt_stmt);
9053 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
9054 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
9055 set_vinfo_for_stmt (stmt, NULL);
9056 free (stmt_info);
9060 /* Function get_vectype_for_scalar_type_and_size.
9062 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9063 by the target. */
9065 static tree
9066 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
9068 tree orig_scalar_type = scalar_type;
9069 scalar_mode inner_mode;
9070 machine_mode simd_mode;
9071 poly_uint64 nunits;
9072 tree vectype;
9074 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9075 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9076 return NULL_TREE;
9078 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9080 /* For vector types of elements whose mode precision doesn't
9081 match their types precision we use a element type of mode
9082 precision. The vectorization routines will have to make sure
9083 they support the proper result truncation/extension.
9084 We also make sure to build vector types with INTEGER_TYPE
9085 component type only. */
9086 if (INTEGRAL_TYPE_P (scalar_type)
9087 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9088 || TREE_CODE (scalar_type) != INTEGER_TYPE))
9089 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9090 TYPE_UNSIGNED (scalar_type));
9092 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9093 When the component mode passes the above test simply use a type
9094 corresponding to that mode. The theory is that any use that
9095 would cause problems with this will disable vectorization anyway. */
9096 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9097 && !INTEGRAL_TYPE_P (scalar_type))
9098 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9100 /* We can't build a vector type of elements with alignment bigger than
9101 their size. */
9102 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9103 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9104 TYPE_UNSIGNED (scalar_type));
9106 /* If we felt back to using the mode fail if there was
9107 no scalar type for it. */
9108 if (scalar_type == NULL_TREE)
9109 return NULL_TREE;
9111 /* If no size was supplied use the mode the target prefers. Otherwise
9112 lookup a vector mode of the specified size. */
9113 if (known_eq (size, 0U))
9114 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9115 else if (!multiple_p (size, nbytes, &nunits)
9116 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9117 return NULL_TREE;
9118 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9119 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
9120 return NULL_TREE;
9122 vectype = build_vector_type (scalar_type, nunits);
9124 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9125 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9126 return NULL_TREE;
9128 /* Re-attach the address-space qualifier if we canonicalized the scalar
9129 type. */
9130 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9131 return build_qualified_type
9132 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9134 return vectype;
9137 poly_uint64 current_vector_size;
9139 /* Function get_vectype_for_scalar_type.
9141 Returns the vector type corresponding to SCALAR_TYPE as supported
9142 by the target. */
9144 tree
9145 get_vectype_for_scalar_type (tree scalar_type)
9147 tree vectype;
9148 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9149 current_vector_size);
9150 if (vectype
9151 && known_eq (current_vector_size, 0U))
9152 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9153 return vectype;
9156 /* Function get_mask_type_for_scalar_type.
9158 Returns the mask type corresponding to a result of comparison
9159 of vectors of specified SCALAR_TYPE as supported by target. */
9161 tree
9162 get_mask_type_for_scalar_type (tree scalar_type)
9164 tree vectype = get_vectype_for_scalar_type (scalar_type);
9166 if (!vectype)
9167 return NULL;
9169 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9170 current_vector_size);
9173 /* Function get_same_sized_vectype
9175 Returns a vector type corresponding to SCALAR_TYPE of size
9176 VECTOR_TYPE if supported by the target. */
9178 tree
9179 get_same_sized_vectype (tree scalar_type, tree vector_type)
9181 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9182 return build_same_sized_truth_vector_type (vector_type);
9184 return get_vectype_for_scalar_type_and_size
9185 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9188 /* Function vect_is_simple_use.
9190 Input:
9191 VINFO - the vect info of the loop or basic block that is being vectorized.
9192 OPERAND - operand in the loop or bb.
9193 Output:
9194 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9195 DT - the type of definition
9197 Returns whether a stmt with OPERAND can be vectorized.
9198 For loops, supportable operands are constants, loop invariants, and operands
9199 that are defined by the current iteration of the loop. Unsupportable
9200 operands are those that are defined by a previous iteration of the loop (as
9201 is the case in reduction/induction computations).
9202 For basic blocks, supportable operands are constants and bb invariants.
9203 For now, operands defined outside the basic block are not supported. */
9205 bool
9206 vect_is_simple_use (tree operand, vec_info *vinfo,
9207 gimple **def_stmt, enum vect_def_type *dt)
9209 *def_stmt = NULL;
9210 *dt = vect_unknown_def_type;
9212 if (dump_enabled_p ())
9214 dump_printf_loc (MSG_NOTE, vect_location,
9215 "vect_is_simple_use: operand ");
9216 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9217 dump_printf (MSG_NOTE, "\n");
9220 if (CONSTANT_CLASS_P (operand))
9222 *dt = vect_constant_def;
9223 return true;
9226 if (is_gimple_min_invariant (operand))
9228 *dt = vect_external_def;
9229 return true;
9232 if (TREE_CODE (operand) != SSA_NAME)
9234 if (dump_enabled_p ())
9235 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9236 "not ssa-name.\n");
9237 return false;
9240 if (SSA_NAME_IS_DEFAULT_DEF (operand))
9242 *dt = vect_external_def;
9243 return true;
9246 *def_stmt = SSA_NAME_DEF_STMT (operand);
9247 if (dump_enabled_p ())
9249 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9250 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
9253 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
9254 *dt = vect_external_def;
9255 else
9257 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
9258 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9261 if (dump_enabled_p ())
9263 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9264 switch (*dt)
9266 case vect_uninitialized_def:
9267 dump_printf (MSG_NOTE, "uninitialized\n");
9268 break;
9269 case vect_constant_def:
9270 dump_printf (MSG_NOTE, "constant\n");
9271 break;
9272 case vect_external_def:
9273 dump_printf (MSG_NOTE, "external\n");
9274 break;
9275 case vect_internal_def:
9276 dump_printf (MSG_NOTE, "internal\n");
9277 break;
9278 case vect_induction_def:
9279 dump_printf (MSG_NOTE, "induction\n");
9280 break;
9281 case vect_reduction_def:
9282 dump_printf (MSG_NOTE, "reduction\n");
9283 break;
9284 case vect_double_reduction_def:
9285 dump_printf (MSG_NOTE, "double reduction\n");
9286 break;
9287 case vect_nested_cycle:
9288 dump_printf (MSG_NOTE, "nested cycle\n");
9289 break;
9290 case vect_unknown_def_type:
9291 dump_printf (MSG_NOTE, "unknown\n");
9292 break;
9296 if (*dt == vect_unknown_def_type)
9298 if (dump_enabled_p ())
9299 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9300 "Unsupported pattern.\n");
9301 return false;
9304 switch (gimple_code (*def_stmt))
9306 case GIMPLE_PHI:
9307 case GIMPLE_ASSIGN:
9308 case GIMPLE_CALL:
9309 break;
9310 default:
9311 if (dump_enabled_p ())
9312 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9313 "unsupported defining stmt:\n");
9314 return false;
9317 return true;
9320 /* Function vect_is_simple_use.
9322 Same as vect_is_simple_use but also determines the vector operand
9323 type of OPERAND and stores it to *VECTYPE. If the definition of
9324 OPERAND is vect_uninitialized_def, vect_constant_def or
9325 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9326 is responsible to compute the best suited vector type for the
9327 scalar operand. */
9329 bool
9330 vect_is_simple_use (tree operand, vec_info *vinfo,
9331 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
9333 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
9334 return false;
9336 /* Now get a vector type if the def is internal, otherwise supply
9337 NULL_TREE and leave it up to the caller to figure out a proper
9338 type for the use stmt. */
9339 if (*dt == vect_internal_def
9340 || *dt == vect_induction_def
9341 || *dt == vect_reduction_def
9342 || *dt == vect_double_reduction_def
9343 || *dt == vect_nested_cycle)
9345 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
9347 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9348 && !STMT_VINFO_RELEVANT (stmt_info)
9349 && !STMT_VINFO_LIVE_P (stmt_info))
9350 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9352 *vectype = STMT_VINFO_VECTYPE (stmt_info);
9353 gcc_assert (*vectype != NULL_TREE);
9355 else if (*dt == vect_uninitialized_def
9356 || *dt == vect_constant_def
9357 || *dt == vect_external_def)
9358 *vectype = NULL_TREE;
9359 else
9360 gcc_unreachable ();
9362 return true;
9366 /* Function supportable_widening_operation
9368 Check whether an operation represented by the code CODE is a
9369 widening operation that is supported by the target platform in
9370 vector form (i.e., when operating on arguments of type VECTYPE_IN
9371 producing a result of type VECTYPE_OUT).
9373 Widening operations we currently support are NOP (CONVERT), FLOAT
9374 and WIDEN_MULT. This function checks if these operations are supported
9375 by the target platform either directly (via vector tree-codes), or via
9376 target builtins.
9378 Output:
9379 - CODE1 and CODE2 are codes of vector operations to be used when
9380 vectorizing the operation, if available.
9381 - MULTI_STEP_CVT determines the number of required intermediate steps in
9382 case of multi-step conversion (like char->short->int - in that case
9383 MULTI_STEP_CVT will be 1).
9384 - INTERM_TYPES contains the intermediate type required to perform the
9385 widening operation (short in the above example). */
9387 bool
9388 supportable_widening_operation (enum tree_code code, gimple *stmt,
9389 tree vectype_out, tree vectype_in,
9390 enum tree_code *code1, enum tree_code *code2,
9391 int *multi_step_cvt,
9392 vec<tree> *interm_types)
9394 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9395 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
9396 struct loop *vect_loop = NULL;
9397 machine_mode vec_mode;
9398 enum insn_code icode1, icode2;
9399 optab optab1, optab2;
9400 tree vectype = vectype_in;
9401 tree wide_vectype = vectype_out;
9402 enum tree_code c1, c2;
9403 int i;
9404 tree prev_type, intermediate_type;
9405 machine_mode intermediate_mode, prev_mode;
9406 optab optab3, optab4;
9408 *multi_step_cvt = 0;
9409 if (loop_info)
9410 vect_loop = LOOP_VINFO_LOOP (loop_info);
9412 switch (code)
9414 case WIDEN_MULT_EXPR:
9415 /* The result of a vectorized widening operation usually requires
9416 two vectors (because the widened results do not fit into one vector).
9417 The generated vector results would normally be expected to be
9418 generated in the same order as in the original scalar computation,
9419 i.e. if 8 results are generated in each vector iteration, they are
9420 to be organized as follows:
9421 vect1: [res1,res2,res3,res4],
9422 vect2: [res5,res6,res7,res8].
9424 However, in the special case that the result of the widening
9425 operation is used in a reduction computation only, the order doesn't
9426 matter (because when vectorizing a reduction we change the order of
9427 the computation). Some targets can take advantage of this and
9428 generate more efficient code. For example, targets like Altivec,
9429 that support widen_mult using a sequence of {mult_even,mult_odd}
9430 generate the following vectors:
9431 vect1: [res1,res3,res5,res7],
9432 vect2: [res2,res4,res6,res8].
9434 When vectorizing outer-loops, we execute the inner-loop sequentially
9435 (each vectorized inner-loop iteration contributes to VF outer-loop
9436 iterations in parallel). We therefore don't allow to change the
9437 order of the computation in the inner-loop during outer-loop
9438 vectorization. */
9439 /* TODO: Another case in which order doesn't *really* matter is when we
9440 widen and then contract again, e.g. (short)((int)x * y >> 8).
9441 Normally, pack_trunc performs an even/odd permute, whereas the
9442 repack from an even/odd expansion would be an interleave, which
9443 would be significantly simpler for e.g. AVX2. */
9444 /* In any case, in order to avoid duplicating the code below, recurse
9445 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9446 are properly set up for the caller. If we fail, we'll continue with
9447 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9448 if (vect_loop
9449 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9450 && !nested_in_vect_loop_p (vect_loop, stmt)
9451 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9452 stmt, vectype_out, vectype_in,
9453 code1, code2, multi_step_cvt,
9454 interm_types))
9456 /* Elements in a vector with vect_used_by_reduction property cannot
9457 be reordered if the use chain with this property does not have the
9458 same operation. One such an example is s += a * b, where elements
9459 in a and b cannot be reordered. Here we check if the vector defined
9460 by STMT is only directly used in the reduction statement. */
9461 tree lhs = gimple_assign_lhs (stmt);
9462 use_operand_p dummy;
9463 gimple *use_stmt;
9464 stmt_vec_info use_stmt_info = NULL;
9465 if (single_imm_use (lhs, &dummy, &use_stmt)
9466 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9467 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9468 return true;
9470 c1 = VEC_WIDEN_MULT_LO_EXPR;
9471 c2 = VEC_WIDEN_MULT_HI_EXPR;
9472 break;
9474 case DOT_PROD_EXPR:
9475 c1 = DOT_PROD_EXPR;
9476 c2 = DOT_PROD_EXPR;
9477 break;
9479 case SAD_EXPR:
9480 c1 = SAD_EXPR;
9481 c2 = SAD_EXPR;
9482 break;
9484 case VEC_WIDEN_MULT_EVEN_EXPR:
9485 /* Support the recursion induced just above. */
9486 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9487 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9488 break;
9490 case WIDEN_LSHIFT_EXPR:
9491 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9492 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
9493 break;
9495 CASE_CONVERT:
9496 c1 = VEC_UNPACK_LO_EXPR;
9497 c2 = VEC_UNPACK_HI_EXPR;
9498 break;
9500 case FLOAT_EXPR:
9501 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9502 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
9503 break;
9505 case FIX_TRUNC_EXPR:
9506 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9507 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9508 computing the operation. */
9509 return false;
9511 default:
9512 gcc_unreachable ();
9515 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
9516 std::swap (c1, c2);
9518 if (code == FIX_TRUNC_EXPR)
9520 /* The signedness is determined from output operand. */
9521 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9522 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
9524 else
9526 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9527 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9530 if (!optab1 || !optab2)
9531 return false;
9533 vec_mode = TYPE_MODE (vectype);
9534 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9535 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
9536 return false;
9538 *code1 = c1;
9539 *code2 = c2;
9541 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9542 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9543 /* For scalar masks we may have different boolean
9544 vector types having the same QImode. Thus we
9545 add additional check for elements number. */
9546 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9547 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
9548 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
9550 /* Check if it's a multi-step conversion that can be done using intermediate
9551 types. */
9553 prev_type = vectype;
9554 prev_mode = vec_mode;
9556 if (!CONVERT_EXPR_CODE_P (code))
9557 return false;
9559 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9560 intermediate steps in promotion sequence. We try
9561 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9562 not. */
9563 interm_types->create (MAX_INTERM_CVT_STEPS);
9564 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9566 intermediate_mode = insn_data[icode1].operand[0].mode;
9567 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9569 poly_uint64 intermediate_nelts
9570 = exact_div (TYPE_VECTOR_SUBPARTS (prev_type), 2);
9571 intermediate_type
9572 = build_truth_vector_type (intermediate_nelts,
9573 current_vector_size);
9574 if (intermediate_mode != TYPE_MODE (intermediate_type))
9575 return false;
9577 else
9578 intermediate_type
9579 = lang_hooks.types.type_for_mode (intermediate_mode,
9580 TYPE_UNSIGNED (prev_type));
9582 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9583 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9585 if (!optab3 || !optab4
9586 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9587 || insn_data[icode1].operand[0].mode != intermediate_mode
9588 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9589 || insn_data[icode2].operand[0].mode != intermediate_mode
9590 || ((icode1 = optab_handler (optab3, intermediate_mode))
9591 == CODE_FOR_nothing)
9592 || ((icode2 = optab_handler (optab4, intermediate_mode))
9593 == CODE_FOR_nothing))
9594 break;
9596 interm_types->quick_push (intermediate_type);
9597 (*multi_step_cvt)++;
9599 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9600 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9601 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9602 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
9603 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
9605 prev_type = intermediate_type;
9606 prev_mode = intermediate_mode;
9609 interm_types->release ();
9610 return false;
9614 /* Function supportable_narrowing_operation
9616 Check whether an operation represented by the code CODE is a
9617 narrowing operation that is supported by the target platform in
9618 vector form (i.e., when operating on arguments of type VECTYPE_IN
9619 and producing a result of type VECTYPE_OUT).
9621 Narrowing operations we currently support are NOP (CONVERT) and
9622 FIX_TRUNC. This function checks if these operations are supported by
9623 the target platform directly via vector tree-codes.
9625 Output:
9626 - CODE1 is the code of a vector operation to be used when
9627 vectorizing the operation, if available.
9628 - MULTI_STEP_CVT determines the number of required intermediate steps in
9629 case of multi-step conversion (like int->short->char - in that case
9630 MULTI_STEP_CVT will be 1).
9631 - INTERM_TYPES contains the intermediate type required to perform the
9632 narrowing operation (short in the above example). */
9634 bool
9635 supportable_narrowing_operation (enum tree_code code,
9636 tree vectype_out, tree vectype_in,
9637 enum tree_code *code1, int *multi_step_cvt,
9638 vec<tree> *interm_types)
9640 machine_mode vec_mode;
9641 enum insn_code icode1;
9642 optab optab1, interm_optab;
9643 tree vectype = vectype_in;
9644 tree narrow_vectype = vectype_out;
9645 enum tree_code c1;
9646 tree intermediate_type, prev_type;
9647 machine_mode intermediate_mode, prev_mode;
9648 int i;
9649 bool uns;
9651 *multi_step_cvt = 0;
9652 switch (code)
9654 CASE_CONVERT:
9655 c1 = VEC_PACK_TRUNC_EXPR;
9656 break;
9658 case FIX_TRUNC_EXPR:
9659 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9660 break;
9662 case FLOAT_EXPR:
9663 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9664 tree code and optabs used for computing the operation. */
9665 return false;
9667 default:
9668 gcc_unreachable ();
9671 if (code == FIX_TRUNC_EXPR)
9672 /* The signedness is determined from output operand. */
9673 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9674 else
9675 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9677 if (!optab1)
9678 return false;
9680 vec_mode = TYPE_MODE (vectype);
9681 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9682 return false;
9684 *code1 = c1;
9686 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9687 /* For scalar masks we may have different boolean
9688 vector types having the same QImode. Thus we
9689 add additional check for elements number. */
9690 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9691 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
9692 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9694 /* Check if it's a multi-step conversion that can be done using intermediate
9695 types. */
9696 prev_mode = vec_mode;
9697 prev_type = vectype;
9698 if (code == FIX_TRUNC_EXPR)
9699 uns = TYPE_UNSIGNED (vectype_out);
9700 else
9701 uns = TYPE_UNSIGNED (vectype);
9703 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9704 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9705 costly than signed. */
9706 if (code == FIX_TRUNC_EXPR && uns)
9708 enum insn_code icode2;
9710 intermediate_type
9711 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9712 interm_optab
9713 = optab_for_tree_code (c1, intermediate_type, optab_default);
9714 if (interm_optab != unknown_optab
9715 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9716 && insn_data[icode1].operand[0].mode
9717 == insn_data[icode2].operand[0].mode)
9719 uns = false;
9720 optab1 = interm_optab;
9721 icode1 = icode2;
9725 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9726 intermediate steps in promotion sequence. We try
9727 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9728 interm_types->create (MAX_INTERM_CVT_STEPS);
9729 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9731 intermediate_mode = insn_data[icode1].operand[0].mode;
9732 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9734 intermediate_type
9735 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9736 current_vector_size);
9737 if (intermediate_mode != TYPE_MODE (intermediate_type))
9738 return false;
9740 else
9741 intermediate_type
9742 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9743 interm_optab
9744 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9745 optab_default);
9746 if (!interm_optab
9747 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9748 || insn_data[icode1].operand[0].mode != intermediate_mode
9749 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9750 == CODE_FOR_nothing))
9751 break;
9753 interm_types->quick_push (intermediate_type);
9754 (*multi_step_cvt)++;
9756 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9757 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9758 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
9759 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9761 prev_mode = intermediate_mode;
9762 prev_type = intermediate_type;
9763 optab1 = interm_optab;
9766 interm_types->release ();
9767 return false;