[PR67828] don't unswitch on default defs of non-parms
[official-gcc.git] / gcc / tree-vect-stmts.c
blob8961dda6c06bc836965c54f555e62aa3839fc821
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "backend.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "rtl.h"
30 #include "ssa.h"
31 #include "alias.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "target.h"
35 #include "gimple-pretty-print.h"
36 #include "internal-fn.h"
37 #include "tree-eh.h"
38 #include "gimplify.h"
39 #include "gimple-iterator.h"
40 #include "gimplify-me.h"
41 #include "tree-cfg.h"
42 #include "tree-ssa-loop-manip.h"
43 #include "cfgloop.h"
44 #include "tree-ssa-loop.h"
45 #include "tree-scalar-evolution.h"
46 #include "flags.h"
47 #include "insn-config.h"
48 #include "recog.h" /* FIXME: for insn_data */
49 #include "insn-codes.h"
50 #include "optabs-tree.h"
51 #include "diagnostic-core.h"
52 #include "tree-vectorizer.h"
53 #include "cgraph.h"
54 #include "builtins.h"
56 /* For lang_hooks.types.type_for_mode. */
57 #include "langhooks.h"
59 /* Return the vectorized type for the given statement. */
61 tree
62 stmt_vectype (struct _stmt_vec_info *stmt_info)
64 return STMT_VINFO_VECTYPE (stmt_info);
67 /* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69 bool
70 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
77 if (!loop_vinfo)
78 return false;
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
82 return (bb->loop_father == loop->inner);
85 /* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
89 unsigned
90 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92 int misalign, enum vect_cost_model_location where)
94 if (body_cost_vec)
96 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
97 add_stmt_info_to_vec (body_cost_vec, count, kind,
98 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
99 misalign);
100 return (unsigned)
101 (builtin_vectorization_cost (kind, vectype, misalign) * count);
104 else
105 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
106 count, kind, stmt_info, misalign, where);
109 /* Return a variable of type ELEM_TYPE[NELEMS]. */
111 static tree
112 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
114 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
115 "vect_array");
118 /* ARRAY is an array of vectors created by create_vector_array.
119 Return an SSA_NAME for the vector in index N. The reference
120 is part of the vectorization of STMT and the vector is associated
121 with scalar destination SCALAR_DEST. */
123 static tree
124 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
125 tree array, unsigned HOST_WIDE_INT n)
127 tree vect_type, vect, vect_name, array_ref;
128 gimple *new_stmt;
130 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
131 vect_type = TREE_TYPE (TREE_TYPE (array));
132 vect = vect_create_destination_var (scalar_dest, vect_type);
133 array_ref = build4 (ARRAY_REF, vect_type, array,
134 build_int_cst (size_type_node, n),
135 NULL_TREE, NULL_TREE);
137 new_stmt = gimple_build_assign (vect, array_ref);
138 vect_name = make_ssa_name (vect, new_stmt);
139 gimple_assign_set_lhs (new_stmt, vect_name);
140 vect_finish_stmt_generation (stmt, new_stmt, gsi);
142 return vect_name;
145 /* ARRAY is an array of vectors created by create_vector_array.
146 Emit code to store SSA_NAME VECT in index N of the array.
147 The store is part of the vectorization of STMT. */
149 static void
150 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
151 tree array, unsigned HOST_WIDE_INT n)
153 tree array_ref;
154 gimple *new_stmt;
156 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
157 build_int_cst (size_type_node, n),
158 NULL_TREE, NULL_TREE);
160 new_stmt = gimple_build_assign (array_ref, vect);
161 vect_finish_stmt_generation (stmt, new_stmt, gsi);
164 /* PTR is a pointer to an array of type TYPE. Return a representation
165 of *PTR. The memory reference replaces those in FIRST_DR
166 (and its group). */
168 static tree
169 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
171 tree mem_ref, alias_ptr_type;
173 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
174 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
175 /* Arrays have the same alignment as their type. */
176 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
177 return mem_ref;
180 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
182 /* Function vect_mark_relevant.
184 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
186 static void
187 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
188 enum vect_relevant relevant, bool live_p,
189 bool used_in_pattern)
191 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
192 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
193 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
194 gimple *pattern_stmt;
196 if (dump_enabled_p ())
197 dump_printf_loc (MSG_NOTE, vect_location,
198 "mark relevant %d, live %d.\n", relevant, live_p);
200 /* If this stmt is an original stmt in a pattern, we might need to mark its
201 related pattern stmt instead of the original stmt. However, such stmts
202 may have their own uses that are not in any pattern, in such cases the
203 stmt itself should be marked. */
204 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
206 bool found = false;
207 if (!used_in_pattern)
209 imm_use_iterator imm_iter;
210 use_operand_p use_p;
211 gimple *use_stmt;
212 tree lhs;
213 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
214 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
216 if (is_gimple_assign (stmt))
217 lhs = gimple_assign_lhs (stmt);
218 else
219 lhs = gimple_call_lhs (stmt);
221 /* This use is out of pattern use, if LHS has other uses that are
222 pattern uses, we should mark the stmt itself, and not the pattern
223 stmt. */
224 if (lhs && TREE_CODE (lhs) == SSA_NAME)
225 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
227 if (is_gimple_debug (USE_STMT (use_p)))
228 continue;
229 use_stmt = USE_STMT (use_p);
231 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
232 continue;
234 if (vinfo_for_stmt (use_stmt)
235 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
237 found = true;
238 break;
243 if (!found)
245 /* This is the last stmt in a sequence that was detected as a
246 pattern that can potentially be vectorized. Don't mark the stmt
247 as relevant/live because it's not going to be vectorized.
248 Instead mark the pattern-stmt that replaces it. */
250 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
252 if (dump_enabled_p ())
253 dump_printf_loc (MSG_NOTE, vect_location,
254 "last stmt in pattern. don't mark"
255 " relevant/live.\n");
256 stmt_info = vinfo_for_stmt (pattern_stmt);
257 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
258 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
259 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
260 stmt = pattern_stmt;
264 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
265 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
266 STMT_VINFO_RELEVANT (stmt_info) = relevant;
268 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
269 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
271 if (dump_enabled_p ())
272 dump_printf_loc (MSG_NOTE, vect_location,
273 "already marked relevant/live.\n");
274 return;
277 worklist->safe_push (stmt);
281 /* Function vect_stmt_relevant_p.
283 Return true if STMT in loop that is represented by LOOP_VINFO is
284 "relevant for vectorization".
286 A stmt is considered "relevant for vectorization" if:
287 - it has uses outside the loop.
288 - it has vdefs (it alters memory).
289 - control stmts in the loop (except for the exit condition).
291 CHECKME: what other side effects would the vectorizer allow? */
293 static bool
294 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
295 enum vect_relevant *relevant, bool *live_p)
297 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
298 ssa_op_iter op_iter;
299 imm_use_iterator imm_iter;
300 use_operand_p use_p;
301 def_operand_p def_p;
303 *relevant = vect_unused_in_scope;
304 *live_p = false;
306 /* cond stmt other than loop exit cond. */
307 if (is_ctrl_stmt (stmt)
308 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
309 != loop_exit_ctrl_vec_info_type)
310 *relevant = vect_used_in_scope;
312 /* changing memory. */
313 if (gimple_code (stmt) != GIMPLE_PHI)
314 if (gimple_vdef (stmt)
315 && !gimple_clobber_p (stmt))
317 if (dump_enabled_p ())
318 dump_printf_loc (MSG_NOTE, vect_location,
319 "vec_stmt_relevant_p: stmt has vdefs.\n");
320 *relevant = vect_used_in_scope;
323 /* uses outside the loop. */
324 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
326 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
328 basic_block bb = gimple_bb (USE_STMT (use_p));
329 if (!flow_bb_inside_loop_p (loop, bb))
331 if (dump_enabled_p ())
332 dump_printf_loc (MSG_NOTE, vect_location,
333 "vec_stmt_relevant_p: used out of loop.\n");
335 if (is_gimple_debug (USE_STMT (use_p)))
336 continue;
338 /* We expect all such uses to be in the loop exit phis
339 (because of loop closed form) */
340 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
341 gcc_assert (bb == single_exit (loop)->dest);
343 *live_p = true;
348 return (*live_p || *relevant);
352 /* Function exist_non_indexing_operands_for_use_p
354 USE is one of the uses attached to STMT. Check if USE is
355 used in STMT for anything other than indexing an array. */
357 static bool
358 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
360 tree operand;
361 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
363 /* USE corresponds to some operand in STMT. If there is no data
364 reference in STMT, then any operand that corresponds to USE
365 is not indexing an array. */
366 if (!STMT_VINFO_DATA_REF (stmt_info))
367 return true;
369 /* STMT has a data_ref. FORNOW this means that its of one of
370 the following forms:
371 -1- ARRAY_REF = var
372 -2- var = ARRAY_REF
373 (This should have been verified in analyze_data_refs).
375 'var' in the second case corresponds to a def, not a use,
376 so USE cannot correspond to any operands that are not used
377 for array indexing.
379 Therefore, all we need to check is if STMT falls into the
380 first case, and whether var corresponds to USE. */
382 if (!gimple_assign_copy_p (stmt))
384 if (is_gimple_call (stmt)
385 && gimple_call_internal_p (stmt))
386 switch (gimple_call_internal_fn (stmt))
388 case IFN_MASK_STORE:
389 operand = gimple_call_arg (stmt, 3);
390 if (operand == use)
391 return true;
392 /* FALLTHRU */
393 case IFN_MASK_LOAD:
394 operand = gimple_call_arg (stmt, 2);
395 if (operand == use)
396 return true;
397 break;
398 default:
399 break;
401 return false;
404 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
405 return false;
406 operand = gimple_assign_rhs1 (stmt);
407 if (TREE_CODE (operand) != SSA_NAME)
408 return false;
410 if (operand == use)
411 return true;
413 return false;
418 Function process_use.
420 Inputs:
421 - a USE in STMT in a loop represented by LOOP_VINFO
422 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
423 that defined USE. This is done by calling mark_relevant and passing it
424 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
425 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
426 be performed.
428 Outputs:
429 Generally, LIVE_P and RELEVANT are used to define the liveness and
430 relevance info of the DEF_STMT of this USE:
431 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
432 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
433 Exceptions:
434 - case 1: If USE is used only for address computations (e.g. array indexing),
435 which does not need to be directly vectorized, then the liveness/relevance
436 of the respective DEF_STMT is left unchanged.
437 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
438 skip DEF_STMT cause it had already been processed.
439 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
440 be modified accordingly.
442 Return true if everything is as expected. Return false otherwise. */
444 static bool
445 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
446 enum vect_relevant relevant, vec<gimple *> *worklist,
447 bool force)
449 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
450 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
451 stmt_vec_info dstmt_vinfo;
452 basic_block bb, def_bb;
453 tree def;
454 gimple *def_stmt;
455 enum vect_def_type dt;
457 /* case 1: we are only interested in uses that need to be vectorized. Uses
458 that are used for address computation are not considered relevant. */
459 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
460 return true;
462 if (!vect_is_simple_use (use, stmt, loop_vinfo, &def_stmt, &def, &dt))
464 if (dump_enabled_p ())
465 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
466 "not vectorized: unsupported use in stmt.\n");
467 return false;
470 if (!def_stmt || gimple_nop_p (def_stmt))
471 return true;
473 def_bb = gimple_bb (def_stmt);
474 if (!flow_bb_inside_loop_p (loop, def_bb))
476 if (dump_enabled_p ())
477 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
478 return true;
481 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
482 DEF_STMT must have already been processed, because this should be the
483 only way that STMT, which is a reduction-phi, was put in the worklist,
484 as there should be no other uses for DEF_STMT in the loop. So we just
485 check that everything is as expected, and we are done. */
486 dstmt_vinfo = vinfo_for_stmt (def_stmt);
487 bb = gimple_bb (stmt);
488 if (gimple_code (stmt) == GIMPLE_PHI
489 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
490 && gimple_code (def_stmt) != GIMPLE_PHI
491 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
492 && bb->loop_father == def_bb->loop_father)
494 if (dump_enabled_p ())
495 dump_printf_loc (MSG_NOTE, vect_location,
496 "reduc-stmt defining reduc-phi in the same nest.\n");
497 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
498 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
499 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
500 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
501 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
502 return true;
505 /* case 3a: outer-loop stmt defining an inner-loop stmt:
506 outer-loop-header-bb:
507 d = def_stmt
508 inner-loop:
509 stmt # use (d)
510 outer-loop-tail-bb:
511 ... */
512 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
514 if (dump_enabled_p ())
515 dump_printf_loc (MSG_NOTE, vect_location,
516 "outer-loop def-stmt defining inner-loop stmt.\n");
518 switch (relevant)
520 case vect_unused_in_scope:
521 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
522 vect_used_in_scope : vect_unused_in_scope;
523 break;
525 case vect_used_in_outer_by_reduction:
526 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
527 relevant = vect_used_by_reduction;
528 break;
530 case vect_used_in_outer:
531 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
532 relevant = vect_used_in_scope;
533 break;
535 case vect_used_in_scope:
536 break;
538 default:
539 gcc_unreachable ();
543 /* case 3b: inner-loop stmt defining an outer-loop stmt:
544 outer-loop-header-bb:
546 inner-loop:
547 d = def_stmt
548 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
549 stmt # use (d) */
550 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
552 if (dump_enabled_p ())
553 dump_printf_loc (MSG_NOTE, vect_location,
554 "inner-loop def-stmt defining outer-loop stmt.\n");
556 switch (relevant)
558 case vect_unused_in_scope:
559 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
560 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
561 vect_used_in_outer_by_reduction : vect_unused_in_scope;
562 break;
564 case vect_used_by_reduction:
565 relevant = vect_used_in_outer_by_reduction;
566 break;
568 case vect_used_in_scope:
569 relevant = vect_used_in_outer;
570 break;
572 default:
573 gcc_unreachable ();
577 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
578 is_pattern_stmt_p (stmt_vinfo));
579 return true;
583 /* Function vect_mark_stmts_to_be_vectorized.
585 Not all stmts in the loop need to be vectorized. For example:
587 for i...
588 for j...
589 1. T0 = i + j
590 2. T1 = a[T0]
592 3. j = j + 1
594 Stmt 1 and 3 do not need to be vectorized, because loop control and
595 addressing of vectorized data-refs are handled differently.
597 This pass detects such stmts. */
599 bool
600 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
602 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
603 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
604 unsigned int nbbs = loop->num_nodes;
605 gimple_stmt_iterator si;
606 gimple *stmt;
607 unsigned int i;
608 stmt_vec_info stmt_vinfo;
609 basic_block bb;
610 gimple *phi;
611 bool live_p;
612 enum vect_relevant relevant, tmp_relevant;
613 enum vect_def_type def_type;
615 if (dump_enabled_p ())
616 dump_printf_loc (MSG_NOTE, vect_location,
617 "=== vect_mark_stmts_to_be_vectorized ===\n");
619 auto_vec<gimple *, 64> worklist;
621 /* 1. Init worklist. */
622 for (i = 0; i < nbbs; i++)
624 bb = bbs[i];
625 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
627 phi = gsi_stmt (si);
628 if (dump_enabled_p ())
630 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
631 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
634 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
639 stmt = gsi_stmt (si);
640 if (dump_enabled_p ())
642 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
643 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
646 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
647 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
651 /* 2. Process_worklist */
652 while (worklist.length () > 0)
654 use_operand_p use_p;
655 ssa_op_iter iter;
657 stmt = worklist.pop ();
658 if (dump_enabled_p ())
660 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
661 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
664 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
665 (DEF_STMT) as relevant/irrelevant and live/dead according to the
666 liveness and relevance properties of STMT. */
667 stmt_vinfo = vinfo_for_stmt (stmt);
668 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
669 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
671 /* Generally, the liveness and relevance properties of STMT are
672 propagated as is to the DEF_STMTs of its USEs:
673 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
674 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
676 One exception is when STMT has been identified as defining a reduction
677 variable; in this case we set the liveness/relevance as follows:
678 live_p = false
679 relevant = vect_used_by_reduction
680 This is because we distinguish between two kinds of relevant stmts -
681 those that are used by a reduction computation, and those that are
682 (also) used by a regular computation. This allows us later on to
683 identify stmts that are used solely by a reduction, and therefore the
684 order of the results that they produce does not have to be kept. */
686 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
687 tmp_relevant = relevant;
688 switch (def_type)
690 case vect_reduction_def:
691 switch (tmp_relevant)
693 case vect_unused_in_scope:
694 relevant = vect_used_by_reduction;
695 break;
697 case vect_used_by_reduction:
698 if (gimple_code (stmt) == GIMPLE_PHI)
699 break;
700 /* fall through */
702 default:
703 if (dump_enabled_p ())
704 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
705 "unsupported use of reduction.\n");
706 return false;
709 live_p = false;
710 break;
712 case vect_nested_cycle:
713 if (tmp_relevant != vect_unused_in_scope
714 && tmp_relevant != vect_used_in_outer_by_reduction
715 && tmp_relevant != vect_used_in_outer)
717 if (dump_enabled_p ())
718 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
719 "unsupported use of nested cycle.\n");
721 return false;
724 live_p = false;
725 break;
727 case vect_double_reduction_def:
728 if (tmp_relevant != vect_unused_in_scope
729 && tmp_relevant != vect_used_by_reduction)
731 if (dump_enabled_p ())
732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
733 "unsupported use of double reduction.\n");
735 return false;
738 live_p = false;
739 break;
741 default:
742 break;
745 if (is_pattern_stmt_p (stmt_vinfo))
747 /* Pattern statements are not inserted into the code, so
748 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
749 have to scan the RHS or function arguments instead. */
750 if (is_gimple_assign (stmt))
752 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
753 tree op = gimple_assign_rhs1 (stmt);
755 i = 1;
756 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
758 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
759 live_p, relevant, &worklist, false)
760 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
761 live_p, relevant, &worklist, false))
762 return false;
763 i = 2;
765 for (; i < gimple_num_ops (stmt); i++)
767 op = gimple_op (stmt, i);
768 if (TREE_CODE (op) == SSA_NAME
769 && !process_use (stmt, op, loop_vinfo, live_p, relevant,
770 &worklist, false))
771 return false;
774 else if (is_gimple_call (stmt))
776 for (i = 0; i < gimple_call_num_args (stmt); i++)
778 tree arg = gimple_call_arg (stmt, i);
779 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
780 &worklist, false))
781 return false;
785 else
786 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
788 tree op = USE_FROM_PTR (use_p);
789 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
790 &worklist, false))
791 return false;
794 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
796 tree off;
797 tree decl = vect_check_gather_scatter (stmt, loop_vinfo, NULL, &off, NULL);
798 gcc_assert (decl);
799 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
800 &worklist, true))
801 return false;
803 } /* while worklist */
805 return true;
809 /* Function vect_model_simple_cost.
811 Models cost for simple operations, i.e. those that only emit ncopies of a
812 single op. Right now, this does not account for multiple insns that could
813 be generated for the single vector op. We will handle that shortly. */
815 void
816 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
817 enum vect_def_type *dt,
818 stmt_vector_for_cost *prologue_cost_vec,
819 stmt_vector_for_cost *body_cost_vec)
821 int i;
822 int inside_cost = 0, prologue_cost = 0;
824 /* The SLP costs were already calculated during SLP tree build. */
825 if (PURE_SLP_STMT (stmt_info))
826 return;
828 /* FORNOW: Assuming maximum 2 args per stmts. */
829 for (i = 0; i < 2; i++)
830 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
831 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
832 stmt_info, 0, vect_prologue);
834 /* Pass the inside-of-loop statements to the target-specific cost model. */
835 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
836 stmt_info, 0, vect_body);
838 if (dump_enabled_p ())
839 dump_printf_loc (MSG_NOTE, vect_location,
840 "vect_model_simple_cost: inside_cost = %d, "
841 "prologue_cost = %d .\n", inside_cost, prologue_cost);
845 /* Model cost for type demotion and promotion operations. PWR is normally
846 zero for single-step promotions and demotions. It will be one if
847 two-step promotion/demotion is required, and so on. Each additional
848 step doubles the number of instructions required. */
850 static void
851 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
852 enum vect_def_type *dt, int pwr)
854 int i, tmp;
855 int inside_cost = 0, prologue_cost = 0;
856 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
857 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
858 void *target_cost_data;
860 /* The SLP costs were already calculated during SLP tree build. */
861 if (PURE_SLP_STMT (stmt_info))
862 return;
864 if (loop_vinfo)
865 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
866 else
867 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
869 for (i = 0; i < pwr + 1; i++)
871 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
872 (i + 1) : i;
873 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
874 vec_promote_demote, stmt_info, 0,
875 vect_body);
878 /* FORNOW: Assuming maximum 2 args per stmts. */
879 for (i = 0; i < 2; i++)
880 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
881 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
882 stmt_info, 0, vect_prologue);
884 if (dump_enabled_p ())
885 dump_printf_loc (MSG_NOTE, vect_location,
886 "vect_model_promotion_demotion_cost: inside_cost = %d, "
887 "prologue_cost = %d .\n", inside_cost, prologue_cost);
890 /* Function vect_cost_group_size
892 For grouped load or store, return the group_size only if it is the first
893 load or store of a group, else return 1. This ensures that group size is
894 only returned once per group. */
896 static int
897 vect_cost_group_size (stmt_vec_info stmt_info)
899 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
901 if (first_stmt == STMT_VINFO_STMT (stmt_info))
902 return GROUP_SIZE (stmt_info);
904 return 1;
908 /* Function vect_model_store_cost
910 Models cost for stores. In the case of grouped accesses, one access
911 has the overhead of the grouped access attributed to it. */
913 void
914 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
915 bool store_lanes_p, enum vect_def_type dt,
916 slp_tree slp_node,
917 stmt_vector_for_cost *prologue_cost_vec,
918 stmt_vector_for_cost *body_cost_vec)
920 int group_size;
921 unsigned int inside_cost = 0, prologue_cost = 0;
922 struct data_reference *first_dr;
923 gimple *first_stmt;
925 if (dt == vect_constant_def || dt == vect_external_def)
926 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
927 stmt_info, 0, vect_prologue);
929 /* Grouped access? */
930 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
932 if (slp_node)
934 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
935 group_size = 1;
937 else
939 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
940 group_size = vect_cost_group_size (stmt_info);
943 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
945 /* Not a grouped access. */
946 else
948 group_size = 1;
949 first_dr = STMT_VINFO_DATA_REF (stmt_info);
952 /* We assume that the cost of a single store-lanes instruction is
953 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
954 access is instead being provided by a permute-and-store operation,
955 include the cost of the permutes. */
956 if (!store_lanes_p && group_size > 1
957 && !STMT_VINFO_STRIDED_P (stmt_info))
959 /* Uses a high and low interleave or shuffle operations for each
960 needed permute. */
961 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
962 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
963 stmt_info, 0, vect_body);
965 if (dump_enabled_p ())
966 dump_printf_loc (MSG_NOTE, vect_location,
967 "vect_model_store_cost: strided group_size = %d .\n",
968 group_size);
971 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
972 /* Costs of the stores. */
973 if (STMT_VINFO_STRIDED_P (stmt_info)
974 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
976 /* N scalar stores plus extracting the elements. */
977 inside_cost += record_stmt_cost (body_cost_vec,
978 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
979 scalar_store, stmt_info, 0, vect_body);
981 else
982 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
984 if (STMT_VINFO_STRIDED_P (stmt_info))
985 inside_cost += record_stmt_cost (body_cost_vec,
986 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
987 vec_to_scalar, stmt_info, 0, vect_body);
989 if (dump_enabled_p ())
990 dump_printf_loc (MSG_NOTE, vect_location,
991 "vect_model_store_cost: inside_cost = %d, "
992 "prologue_cost = %d .\n", inside_cost, prologue_cost);
996 /* Calculate cost of DR's memory access. */
997 void
998 vect_get_store_cost (struct data_reference *dr, int ncopies,
999 unsigned int *inside_cost,
1000 stmt_vector_for_cost *body_cost_vec)
1002 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1003 gimple *stmt = DR_STMT (dr);
1004 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1006 switch (alignment_support_scheme)
1008 case dr_aligned:
1010 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1011 vector_store, stmt_info, 0,
1012 vect_body);
1014 if (dump_enabled_p ())
1015 dump_printf_loc (MSG_NOTE, vect_location,
1016 "vect_model_store_cost: aligned.\n");
1017 break;
1020 case dr_unaligned_supported:
1022 /* Here, we assign an additional cost for the unaligned store. */
1023 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1024 unaligned_store, stmt_info,
1025 DR_MISALIGNMENT (dr), vect_body);
1026 if (dump_enabled_p ())
1027 dump_printf_loc (MSG_NOTE, vect_location,
1028 "vect_model_store_cost: unaligned supported by "
1029 "hardware.\n");
1030 break;
1033 case dr_unaligned_unsupported:
1035 *inside_cost = VECT_MAX_COST;
1037 if (dump_enabled_p ())
1038 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1039 "vect_model_store_cost: unsupported access.\n");
1040 break;
1043 default:
1044 gcc_unreachable ();
1049 /* Function vect_model_load_cost
1051 Models cost for loads. In the case of grouped accesses, the last access
1052 has the overhead of the grouped access attributed to it. Since unaligned
1053 accesses are supported for loads, we also account for the costs of the
1054 access scheme chosen. */
1056 void
1057 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1058 bool load_lanes_p, slp_tree slp_node,
1059 stmt_vector_for_cost *prologue_cost_vec,
1060 stmt_vector_for_cost *body_cost_vec)
1062 int group_size;
1063 gimple *first_stmt;
1064 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1065 unsigned int inside_cost = 0, prologue_cost = 0;
1067 /* Grouped accesses? */
1068 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1069 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1071 group_size = vect_cost_group_size (stmt_info);
1072 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1074 /* Not a grouped access. */
1075 else
1077 group_size = 1;
1078 first_dr = dr;
1081 /* We assume that the cost of a single load-lanes instruction is
1082 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1083 access is instead being provided by a load-and-permute operation,
1084 include the cost of the permutes. */
1085 if (!load_lanes_p && group_size > 1
1086 && !STMT_VINFO_STRIDED_P (stmt_info))
1088 /* Uses an even and odd extract operations or shuffle operations
1089 for each needed permute. */
1090 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1091 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1092 stmt_info, 0, vect_body);
1094 if (dump_enabled_p ())
1095 dump_printf_loc (MSG_NOTE, vect_location,
1096 "vect_model_load_cost: strided group_size = %d .\n",
1097 group_size);
1100 /* The loads themselves. */
1101 if (STMT_VINFO_STRIDED_P (stmt_info)
1102 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1104 /* N scalar loads plus gathering them into a vector. */
1105 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1106 inside_cost += record_stmt_cost (body_cost_vec,
1107 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1108 scalar_load, stmt_info, 0, vect_body);
1110 else
1111 vect_get_load_cost (first_dr, ncopies,
1112 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1113 || group_size > 1 || slp_node),
1114 &inside_cost, &prologue_cost,
1115 prologue_cost_vec, body_cost_vec, true);
1116 if (STMT_VINFO_STRIDED_P (stmt_info))
1117 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1118 stmt_info, 0, vect_body);
1120 if (dump_enabled_p ())
1121 dump_printf_loc (MSG_NOTE, vect_location,
1122 "vect_model_load_cost: inside_cost = %d, "
1123 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1127 /* Calculate cost of DR's memory access. */
1128 void
1129 vect_get_load_cost (struct data_reference *dr, int ncopies,
1130 bool add_realign_cost, unsigned int *inside_cost,
1131 unsigned int *prologue_cost,
1132 stmt_vector_for_cost *prologue_cost_vec,
1133 stmt_vector_for_cost *body_cost_vec,
1134 bool record_prologue_costs)
1136 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1137 gimple *stmt = DR_STMT (dr);
1138 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1140 switch (alignment_support_scheme)
1142 case dr_aligned:
1144 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1145 stmt_info, 0, vect_body);
1147 if (dump_enabled_p ())
1148 dump_printf_loc (MSG_NOTE, vect_location,
1149 "vect_model_load_cost: aligned.\n");
1151 break;
1153 case dr_unaligned_supported:
1155 /* Here, we assign an additional cost for the unaligned load. */
1156 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1157 unaligned_load, stmt_info,
1158 DR_MISALIGNMENT (dr), vect_body);
1160 if (dump_enabled_p ())
1161 dump_printf_loc (MSG_NOTE, vect_location,
1162 "vect_model_load_cost: unaligned supported by "
1163 "hardware.\n");
1165 break;
1167 case dr_explicit_realign:
1169 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1170 vector_load, stmt_info, 0, vect_body);
1171 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1172 vec_perm, stmt_info, 0, vect_body);
1174 /* FIXME: If the misalignment remains fixed across the iterations of
1175 the containing loop, the following cost should be added to the
1176 prologue costs. */
1177 if (targetm.vectorize.builtin_mask_for_load)
1178 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1179 stmt_info, 0, vect_body);
1181 if (dump_enabled_p ())
1182 dump_printf_loc (MSG_NOTE, vect_location,
1183 "vect_model_load_cost: explicit realign\n");
1185 break;
1187 case dr_explicit_realign_optimized:
1189 if (dump_enabled_p ())
1190 dump_printf_loc (MSG_NOTE, vect_location,
1191 "vect_model_load_cost: unaligned software "
1192 "pipelined.\n");
1194 /* Unaligned software pipeline has a load of an address, an initial
1195 load, and possibly a mask operation to "prime" the loop. However,
1196 if this is an access in a group of loads, which provide grouped
1197 access, then the above cost should only be considered for one
1198 access in the group. Inside the loop, there is a load op
1199 and a realignment op. */
1201 if (add_realign_cost && record_prologue_costs)
1203 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1204 vector_stmt, stmt_info,
1205 0, vect_prologue);
1206 if (targetm.vectorize.builtin_mask_for_load)
1207 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1208 vector_stmt, stmt_info,
1209 0, vect_prologue);
1212 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1213 stmt_info, 0, vect_body);
1214 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1215 stmt_info, 0, vect_body);
1217 if (dump_enabled_p ())
1218 dump_printf_loc (MSG_NOTE, vect_location,
1219 "vect_model_load_cost: explicit realign optimized"
1220 "\n");
1222 break;
1225 case dr_unaligned_unsupported:
1227 *inside_cost = VECT_MAX_COST;
1229 if (dump_enabled_p ())
1230 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1231 "vect_model_load_cost: unsupported access.\n");
1232 break;
1235 default:
1236 gcc_unreachable ();
1240 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1241 the loop preheader for the vectorized stmt STMT. */
1243 static void
1244 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1246 if (gsi)
1247 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1248 else
1250 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1251 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1253 if (loop_vinfo)
1255 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1256 basic_block new_bb;
1257 edge pe;
1259 if (nested_in_vect_loop_p (loop, stmt))
1260 loop = loop->inner;
1262 pe = loop_preheader_edge (loop);
1263 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1264 gcc_assert (!new_bb);
1266 else
1268 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1269 basic_block bb;
1270 gimple_stmt_iterator gsi_bb_start;
1272 gcc_assert (bb_vinfo);
1273 bb = BB_VINFO_BB (bb_vinfo);
1274 gsi_bb_start = gsi_after_labels (bb);
1275 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1279 if (dump_enabled_p ())
1281 dump_printf_loc (MSG_NOTE, vect_location,
1282 "created new init_stmt: ");
1283 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1287 /* Function vect_init_vector.
1289 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1290 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1291 vector type a vector with all elements equal to VAL is created first.
1292 Place the initialization at BSI if it is not NULL. Otherwise, place the
1293 initialization at the loop preheader.
1294 Return the DEF of INIT_STMT.
1295 It will be used in the vectorization of STMT. */
1297 tree
1298 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1300 tree new_var;
1301 gimple *init_stmt;
1302 tree vec_oprnd;
1303 tree new_temp;
1305 if (TREE_CODE (type) == VECTOR_TYPE
1306 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1308 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1310 if (CONSTANT_CLASS_P (val))
1311 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1312 else
1314 new_temp = make_ssa_name (TREE_TYPE (type));
1315 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1316 vect_init_vector_1 (stmt, init_stmt, gsi);
1317 val = new_temp;
1320 val = build_vector_from_val (type, val);
1323 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1324 init_stmt = gimple_build_assign (new_var, val);
1325 new_temp = make_ssa_name (new_var, init_stmt);
1326 gimple_assign_set_lhs (init_stmt, new_temp);
1327 vect_init_vector_1 (stmt, init_stmt, gsi);
1328 vec_oprnd = gimple_assign_lhs (init_stmt);
1329 return vec_oprnd;
1333 /* Function vect_get_vec_def_for_operand.
1335 OP is an operand in STMT. This function returns a (vector) def that will be
1336 used in the vectorized stmt for STMT.
1338 In the case that OP is an SSA_NAME which is defined in the loop, then
1339 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1341 In case OP is an invariant or constant, a new stmt that creates a vector def
1342 needs to be introduced. */
1344 tree
1345 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree *scalar_def)
1347 tree vec_oprnd;
1348 gimple *vec_stmt;
1349 gimple *def_stmt;
1350 stmt_vec_info def_stmt_info = NULL;
1351 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1352 unsigned int nunits;
1353 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1354 tree def;
1355 enum vect_def_type dt;
1356 bool is_simple_use;
1357 tree vector_type;
1359 if (dump_enabled_p ())
1361 dump_printf_loc (MSG_NOTE, vect_location,
1362 "vect_get_vec_def_for_operand: ");
1363 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1364 dump_printf (MSG_NOTE, "\n");
1367 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo,
1368 &def_stmt, &def, &dt);
1369 gcc_assert (is_simple_use);
1370 if (dump_enabled_p ())
1372 int loc_printed = 0;
1373 if (def)
1375 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1376 loc_printed = 1;
1377 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1378 dump_printf (MSG_NOTE, "\n");
1380 if (def_stmt)
1382 if (loc_printed)
1383 dump_printf (MSG_NOTE, " def_stmt = ");
1384 else
1385 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1386 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1390 switch (dt)
1392 /* Case 1: operand is a constant. */
1393 case vect_constant_def:
1395 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1396 gcc_assert (vector_type);
1397 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1399 if (scalar_def)
1400 *scalar_def = op;
1402 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1403 if (dump_enabled_p ())
1404 dump_printf_loc (MSG_NOTE, vect_location,
1405 "Create vector_cst. nunits = %d\n", nunits);
1407 return vect_init_vector (stmt, op, vector_type, NULL);
1410 /* Case 2: operand is defined outside the loop - loop invariant. */
1411 case vect_external_def:
1413 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1414 gcc_assert (vector_type);
1416 if (scalar_def)
1417 *scalar_def = def;
1419 /* Create 'vec_inv = {inv,inv,..,inv}' */
1420 if (dump_enabled_p ())
1421 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1423 return vect_init_vector (stmt, def, vector_type, NULL);
1426 /* Case 3: operand is defined inside the loop. */
1427 case vect_internal_def:
1429 if (scalar_def)
1430 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1432 /* Get the def from the vectorized stmt. */
1433 def_stmt_info = vinfo_for_stmt (def_stmt);
1435 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1436 /* Get vectorized pattern statement. */
1437 if (!vec_stmt
1438 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1439 && !STMT_VINFO_RELEVANT (def_stmt_info))
1440 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1441 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1442 gcc_assert (vec_stmt);
1443 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1444 vec_oprnd = PHI_RESULT (vec_stmt);
1445 else if (is_gimple_call (vec_stmt))
1446 vec_oprnd = gimple_call_lhs (vec_stmt);
1447 else
1448 vec_oprnd = gimple_assign_lhs (vec_stmt);
1449 return vec_oprnd;
1452 /* Case 4: operand is defined by a loop header phi - reduction */
1453 case vect_reduction_def:
1454 case vect_double_reduction_def:
1455 case vect_nested_cycle:
1457 struct loop *loop;
1459 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1460 loop = (gimple_bb (def_stmt))->loop_father;
1462 /* Get the def before the loop */
1463 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1464 return get_initial_def_for_reduction (stmt, op, scalar_def);
1467 /* Case 5: operand is defined by loop-header phi - induction. */
1468 case vect_induction_def:
1470 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1472 /* Get the def from the vectorized stmt. */
1473 def_stmt_info = vinfo_for_stmt (def_stmt);
1474 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1475 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1476 vec_oprnd = PHI_RESULT (vec_stmt);
1477 else
1478 vec_oprnd = gimple_get_lhs (vec_stmt);
1479 return vec_oprnd;
1482 default:
1483 gcc_unreachable ();
1488 /* Function vect_get_vec_def_for_stmt_copy
1490 Return a vector-def for an operand. This function is used when the
1491 vectorized stmt to be created (by the caller to this function) is a "copy"
1492 created in case the vectorized result cannot fit in one vector, and several
1493 copies of the vector-stmt are required. In this case the vector-def is
1494 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1495 of the stmt that defines VEC_OPRND.
1496 DT is the type of the vector def VEC_OPRND.
1498 Context:
1499 In case the vectorization factor (VF) is bigger than the number
1500 of elements that can fit in a vectype (nunits), we have to generate
1501 more than one vector stmt to vectorize the scalar stmt. This situation
1502 arises when there are multiple data-types operated upon in the loop; the
1503 smallest data-type determines the VF, and as a result, when vectorizing
1504 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1505 vector stmt (each computing a vector of 'nunits' results, and together
1506 computing 'VF' results in each iteration). This function is called when
1507 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1508 which VF=16 and nunits=4, so the number of copies required is 4):
1510 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1512 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1513 VS1.1: vx.1 = memref1 VS1.2
1514 VS1.2: vx.2 = memref2 VS1.3
1515 VS1.3: vx.3 = memref3
1517 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1518 VSnew.1: vz1 = vx.1 + ... VSnew.2
1519 VSnew.2: vz2 = vx.2 + ... VSnew.3
1520 VSnew.3: vz3 = vx.3 + ...
1522 The vectorization of S1 is explained in vectorizable_load.
1523 The vectorization of S2:
1524 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1525 the function 'vect_get_vec_def_for_operand' is called to
1526 get the relevant vector-def for each operand of S2. For operand x it
1527 returns the vector-def 'vx.0'.
1529 To create the remaining copies of the vector-stmt (VSnew.j), this
1530 function is called to get the relevant vector-def for each operand. It is
1531 obtained from the respective VS1.j stmt, which is recorded in the
1532 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1534 For example, to obtain the vector-def 'vx.1' in order to create the
1535 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1536 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1537 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1538 and return its def ('vx.1').
1539 Overall, to create the above sequence this function will be called 3 times:
1540 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1541 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1542 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1544 tree
1545 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1547 gimple *vec_stmt_for_operand;
1548 stmt_vec_info def_stmt_info;
1550 /* Do nothing; can reuse same def. */
1551 if (dt == vect_external_def || dt == vect_constant_def )
1552 return vec_oprnd;
1554 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1555 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1556 gcc_assert (def_stmt_info);
1557 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1558 gcc_assert (vec_stmt_for_operand);
1559 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1560 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1561 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1562 else
1563 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1564 return vec_oprnd;
1568 /* Get vectorized definitions for the operands to create a copy of an original
1569 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1571 static void
1572 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1573 vec<tree> *vec_oprnds0,
1574 vec<tree> *vec_oprnds1)
1576 tree vec_oprnd = vec_oprnds0->pop ();
1578 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1579 vec_oprnds0->quick_push (vec_oprnd);
1581 if (vec_oprnds1 && vec_oprnds1->length ())
1583 vec_oprnd = vec_oprnds1->pop ();
1584 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1585 vec_oprnds1->quick_push (vec_oprnd);
1590 /* Get vectorized definitions for OP0 and OP1.
1591 REDUC_INDEX is the index of reduction operand in case of reduction,
1592 and -1 otherwise. */
1594 void
1595 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1596 vec<tree> *vec_oprnds0,
1597 vec<tree> *vec_oprnds1,
1598 slp_tree slp_node, int reduc_index)
1600 if (slp_node)
1602 int nops = (op1 == NULL_TREE) ? 1 : 2;
1603 auto_vec<tree> ops (nops);
1604 auto_vec<vec<tree> > vec_defs (nops);
1606 ops.quick_push (op0);
1607 if (op1)
1608 ops.quick_push (op1);
1610 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1612 *vec_oprnds0 = vec_defs[0];
1613 if (op1)
1614 *vec_oprnds1 = vec_defs[1];
1616 else
1618 tree vec_oprnd;
1620 vec_oprnds0->create (1);
1621 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1622 vec_oprnds0->quick_push (vec_oprnd);
1624 if (op1)
1626 vec_oprnds1->create (1);
1627 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1628 vec_oprnds1->quick_push (vec_oprnd);
1634 /* Function vect_finish_stmt_generation.
1636 Insert a new stmt. */
1638 void
1639 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1640 gimple_stmt_iterator *gsi)
1642 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1643 vec_info *vinfo = stmt_info->vinfo;
1645 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1647 if (!gsi_end_p (*gsi)
1648 && gimple_has_mem_ops (vec_stmt))
1650 gimple *at_stmt = gsi_stmt (*gsi);
1651 tree vuse = gimple_vuse (at_stmt);
1652 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1654 tree vdef = gimple_vdef (at_stmt);
1655 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1656 /* If we have an SSA vuse and insert a store, update virtual
1657 SSA form to avoid triggering the renamer. Do so only
1658 if we can easily see all uses - which is what almost always
1659 happens with the way vectorized stmts are inserted. */
1660 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1661 && ((is_gimple_assign (vec_stmt)
1662 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1663 || (is_gimple_call (vec_stmt)
1664 && !(gimple_call_flags (vec_stmt)
1665 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1667 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1668 gimple_set_vdef (vec_stmt, new_vdef);
1669 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1673 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1675 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1677 if (dump_enabled_p ())
1679 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1680 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1683 gimple_set_location (vec_stmt, gimple_location (stmt));
1685 /* While EH edges will generally prevent vectorization, stmt might
1686 e.g. be in a must-not-throw region. Ensure newly created stmts
1687 that could throw are part of the same region. */
1688 int lp_nr = lookup_stmt_eh_lp (stmt);
1689 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1690 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1693 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1694 a function declaration if the target has a vectorized version
1695 of the function, or NULL_TREE if the function cannot be vectorized. */
1697 tree
1698 vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
1700 tree fndecl = gimple_call_fndecl (call);
1702 /* We only handle functions that do not read or clobber memory -- i.e.
1703 const or novops ones. */
1704 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1705 return NULL_TREE;
1707 if (!fndecl
1708 || TREE_CODE (fndecl) != FUNCTION_DECL
1709 || !DECL_BUILT_IN (fndecl))
1710 return NULL_TREE;
1712 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1713 vectype_in);
1717 static tree permute_vec_elements (tree, tree, tree, gimple *,
1718 gimple_stmt_iterator *);
1721 /* Function vectorizable_mask_load_store.
1723 Check if STMT performs a conditional load or store that can be vectorized.
1724 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1725 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1726 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1728 static bool
1729 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
1730 gimple **vec_stmt, slp_tree slp_node)
1732 tree vec_dest = NULL;
1733 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1734 stmt_vec_info prev_stmt_info;
1735 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1736 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1737 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1738 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1739 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1740 tree elem_type;
1741 gimple *new_stmt;
1742 tree dummy;
1743 tree dataref_ptr = NULL_TREE;
1744 gimple *ptr_incr;
1745 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1746 int ncopies;
1747 int i, j;
1748 bool inv_p;
1749 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1750 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1751 int gather_scale = 1;
1752 enum vect_def_type gather_dt = vect_unknown_def_type;
1753 bool is_store;
1754 tree mask;
1755 gimple *def_stmt;
1756 tree def;
1757 enum vect_def_type dt;
1759 if (slp_node != NULL)
1760 return false;
1762 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1763 gcc_assert (ncopies >= 1);
1765 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1766 mask = gimple_call_arg (stmt, 2);
1767 if (TYPE_PRECISION (TREE_TYPE (mask))
1768 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1769 return false;
1771 /* FORNOW. This restriction should be relaxed. */
1772 if (nested_in_vect_loop && ncopies > 1)
1774 if (dump_enabled_p ())
1775 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1776 "multiple types in nested loop.");
1777 return false;
1780 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1781 return false;
1783 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1784 return false;
1786 if (!STMT_VINFO_DATA_REF (stmt_info))
1787 return false;
1789 elem_type = TREE_TYPE (vectype);
1791 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1792 return false;
1794 if (STMT_VINFO_STRIDED_P (stmt_info))
1795 return false;
1797 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1799 gimple *def_stmt;
1800 tree def;
1801 gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
1802 &gather_off, &gather_scale);
1803 gcc_assert (gather_decl);
1804 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo,
1805 &def_stmt, &def, &gather_dt,
1806 &gather_off_vectype))
1808 if (dump_enabled_p ())
1809 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1810 "gather index use not simple.");
1811 return false;
1814 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1815 tree masktype
1816 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1817 if (TREE_CODE (masktype) == INTEGER_TYPE)
1819 if (dump_enabled_p ())
1820 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1821 "masked gather with integer mask not supported.");
1822 return false;
1825 else if (tree_int_cst_compare (nested_in_vect_loop
1826 ? STMT_VINFO_DR_STEP (stmt_info)
1827 : DR_STEP (dr), size_zero_node) <= 0)
1828 return false;
1829 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1830 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1831 return false;
1833 if (TREE_CODE (mask) != SSA_NAME)
1834 return false;
1836 if (!vect_is_simple_use (mask, stmt, loop_vinfo,
1837 &def_stmt, &def, &dt))
1838 return false;
1840 if (is_store)
1842 tree rhs = gimple_call_arg (stmt, 3);
1843 if (!vect_is_simple_use (rhs, stmt, loop_vinfo,
1844 &def_stmt, &def, &dt))
1845 return false;
1848 if (!vec_stmt) /* transformation not required. */
1850 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1851 if (is_store)
1852 vect_model_store_cost (stmt_info, ncopies, false, dt,
1853 NULL, NULL, NULL);
1854 else
1855 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1856 return true;
1859 /** Transform. **/
1861 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1863 tree vec_oprnd0 = NULL_TREE, op;
1864 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1865 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1866 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1867 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1868 tree mask_perm_mask = NULL_TREE;
1869 edge pe = loop_preheader_edge (loop);
1870 gimple_seq seq;
1871 basic_block new_bb;
1872 enum { NARROW, NONE, WIDEN } modifier;
1873 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1875 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1876 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1877 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1878 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1879 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1880 scaletype = TREE_VALUE (arglist);
1881 gcc_checking_assert (types_compatible_p (srctype, rettype)
1882 && types_compatible_p (srctype, masktype));
1884 if (nunits == gather_off_nunits)
1885 modifier = NONE;
1886 else if (nunits == gather_off_nunits / 2)
1888 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1889 modifier = WIDEN;
1891 for (i = 0; i < gather_off_nunits; ++i)
1892 sel[i] = i | nunits;
1894 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
1896 else if (nunits == gather_off_nunits * 2)
1898 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1899 modifier = NARROW;
1901 for (i = 0; i < nunits; ++i)
1902 sel[i] = i < gather_off_nunits
1903 ? i : i + nunits - gather_off_nunits;
1905 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
1906 ncopies *= 2;
1907 for (i = 0; i < nunits; ++i)
1908 sel[i] = i | gather_off_nunits;
1909 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
1911 else
1912 gcc_unreachable ();
1914 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1916 ptr = fold_convert (ptrtype, gather_base);
1917 if (!is_gimple_min_invariant (ptr))
1919 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1920 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1921 gcc_assert (!new_bb);
1924 scale = build_int_cst (scaletype, gather_scale);
1926 prev_stmt_info = NULL;
1927 for (j = 0; j < ncopies; ++j)
1929 if (modifier == WIDEN && (j & 1))
1930 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1931 perm_mask, stmt, gsi);
1932 else if (j == 0)
1933 op = vec_oprnd0
1934 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1935 else
1936 op = vec_oprnd0
1937 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1939 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1941 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1942 == TYPE_VECTOR_SUBPARTS (idxtype));
1943 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1944 var = make_ssa_name (var);
1945 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1946 new_stmt
1947 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1948 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1949 op = var;
1952 if (mask_perm_mask && (j & 1))
1953 mask_op = permute_vec_elements (mask_op, mask_op,
1954 mask_perm_mask, stmt, gsi);
1955 else
1957 if (j == 0)
1958 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1959 else
1961 vect_is_simple_use (vec_mask, NULL, loop_vinfo,
1962 &def_stmt, &def, &dt);
1963 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1966 mask_op = vec_mask;
1967 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1969 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
1970 == TYPE_VECTOR_SUBPARTS (masktype));
1971 var = vect_get_new_vect_var (masktype, vect_simple_var,
1972 NULL);
1973 var = make_ssa_name (var);
1974 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
1975 new_stmt
1976 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
1977 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1978 mask_op = var;
1982 new_stmt
1983 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
1984 scale);
1986 if (!useless_type_conversion_p (vectype, rettype))
1988 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
1989 == TYPE_VECTOR_SUBPARTS (rettype));
1990 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
1991 op = make_ssa_name (var, new_stmt);
1992 gimple_call_set_lhs (new_stmt, op);
1993 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1994 var = make_ssa_name (vec_dest);
1995 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
1996 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1998 else
2000 var = make_ssa_name (vec_dest, new_stmt);
2001 gimple_call_set_lhs (new_stmt, var);
2004 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2006 if (modifier == NARROW)
2008 if ((j & 1) == 0)
2010 prev_res = var;
2011 continue;
2013 var = permute_vec_elements (prev_res, var,
2014 perm_mask, stmt, gsi);
2015 new_stmt = SSA_NAME_DEF_STMT (var);
2018 if (prev_stmt_info == NULL)
2019 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2020 else
2021 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2022 prev_stmt_info = vinfo_for_stmt (new_stmt);
2025 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2026 from the IL. */
2027 tree lhs = gimple_call_lhs (stmt);
2028 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2029 set_vinfo_for_stmt (new_stmt, stmt_info);
2030 set_vinfo_for_stmt (stmt, NULL);
2031 STMT_VINFO_STMT (stmt_info) = new_stmt;
2032 gsi_replace (gsi, new_stmt, true);
2033 return true;
2035 else if (is_store)
2037 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2038 prev_stmt_info = NULL;
2039 for (i = 0; i < ncopies; i++)
2041 unsigned align, misalign;
2043 if (i == 0)
2045 tree rhs = gimple_call_arg (stmt, 3);
2046 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2047 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2048 /* We should have catched mismatched types earlier. */
2049 gcc_assert (useless_type_conversion_p (vectype,
2050 TREE_TYPE (vec_rhs)));
2051 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2052 NULL_TREE, &dummy, gsi,
2053 &ptr_incr, false, &inv_p);
2054 gcc_assert (!inv_p);
2056 else
2058 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, &def_stmt,
2059 &def, &dt);
2060 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2061 vect_is_simple_use (vec_mask, NULL, loop_vinfo, &def_stmt,
2062 &def, &dt);
2063 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2064 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2065 TYPE_SIZE_UNIT (vectype));
2068 align = TYPE_ALIGN_UNIT (vectype);
2069 if (aligned_access_p (dr))
2070 misalign = 0;
2071 else if (DR_MISALIGNMENT (dr) == -1)
2073 align = TYPE_ALIGN_UNIT (elem_type);
2074 misalign = 0;
2076 else
2077 misalign = DR_MISALIGNMENT (dr);
2078 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2079 misalign);
2080 new_stmt
2081 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2082 gimple_call_arg (stmt, 1),
2083 vec_mask, vec_rhs);
2084 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2085 if (i == 0)
2086 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2087 else
2088 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2089 prev_stmt_info = vinfo_for_stmt (new_stmt);
2092 else
2094 tree vec_mask = NULL_TREE;
2095 prev_stmt_info = NULL;
2096 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2097 for (i = 0; i < ncopies; i++)
2099 unsigned align, misalign;
2101 if (i == 0)
2103 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2104 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2105 NULL_TREE, &dummy, gsi,
2106 &ptr_incr, false, &inv_p);
2107 gcc_assert (!inv_p);
2109 else
2111 vect_is_simple_use (vec_mask, NULL, loop_vinfo, &def_stmt,
2112 &def, &dt);
2113 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2114 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2115 TYPE_SIZE_UNIT (vectype));
2118 align = TYPE_ALIGN_UNIT (vectype);
2119 if (aligned_access_p (dr))
2120 misalign = 0;
2121 else if (DR_MISALIGNMENT (dr) == -1)
2123 align = TYPE_ALIGN_UNIT (elem_type);
2124 misalign = 0;
2126 else
2127 misalign = DR_MISALIGNMENT (dr);
2128 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2129 misalign);
2130 new_stmt
2131 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2132 gimple_call_arg (stmt, 1),
2133 vec_mask);
2134 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2135 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2136 if (i == 0)
2137 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2138 else
2139 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2140 prev_stmt_info = vinfo_for_stmt (new_stmt);
2144 if (!is_store)
2146 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2147 from the IL. */
2148 tree lhs = gimple_call_lhs (stmt);
2149 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2150 set_vinfo_for_stmt (new_stmt, stmt_info);
2151 set_vinfo_for_stmt (stmt, NULL);
2152 STMT_VINFO_STMT (stmt_info) = new_stmt;
2153 gsi_replace (gsi, new_stmt, true);
2156 return true;
2160 /* Function vectorizable_call.
2162 Check if GS performs a function call that can be vectorized.
2163 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2164 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2165 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2167 static bool
2168 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2169 slp_tree slp_node)
2171 gcall *stmt;
2172 tree vec_dest;
2173 tree scalar_dest;
2174 tree op, type;
2175 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2176 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2177 tree vectype_out, vectype_in;
2178 int nunits_in;
2179 int nunits_out;
2180 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2181 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2182 vec_info *vinfo = stmt_info->vinfo;
2183 tree fndecl, new_temp, def, rhs_type;
2184 gimple *def_stmt;
2185 enum vect_def_type dt[3]
2186 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2187 gimple *new_stmt = NULL;
2188 int ncopies, j;
2189 vec<tree> vargs = vNULL;
2190 enum { NARROW, NONE, WIDEN } modifier;
2191 size_t i, nargs;
2192 tree lhs;
2194 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2195 return false;
2197 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2198 return false;
2200 /* Is GS a vectorizable call? */
2201 stmt = dyn_cast <gcall *> (gs);
2202 if (!stmt)
2203 return false;
2205 if (gimple_call_internal_p (stmt)
2206 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2207 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2208 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2209 slp_node);
2211 if (gimple_call_lhs (stmt) == NULL_TREE
2212 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2213 return false;
2215 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2217 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2219 /* Process function arguments. */
2220 rhs_type = NULL_TREE;
2221 vectype_in = NULL_TREE;
2222 nargs = gimple_call_num_args (stmt);
2224 /* Bail out if the function has more than three arguments, we do not have
2225 interesting builtin functions to vectorize with more than two arguments
2226 except for fma. No arguments is also not good. */
2227 if (nargs == 0 || nargs > 3)
2228 return false;
2230 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2231 if (gimple_call_internal_p (stmt)
2232 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2234 nargs = 0;
2235 rhs_type = unsigned_type_node;
2238 for (i = 0; i < nargs; i++)
2240 tree opvectype;
2242 op = gimple_call_arg (stmt, i);
2244 /* We can only handle calls with arguments of the same type. */
2245 if (rhs_type
2246 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2248 if (dump_enabled_p ())
2249 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2250 "argument types differ.\n");
2251 return false;
2253 if (!rhs_type)
2254 rhs_type = TREE_TYPE (op);
2256 if (!vect_is_simple_use_1 (op, stmt, vinfo,
2257 &def_stmt, &def, &dt[i], &opvectype))
2259 if (dump_enabled_p ())
2260 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2261 "use not simple.\n");
2262 return false;
2265 if (!vectype_in)
2266 vectype_in = opvectype;
2267 else if (opvectype
2268 && opvectype != vectype_in)
2270 if (dump_enabled_p ())
2271 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2272 "argument vector types differ.\n");
2273 return false;
2276 /* If all arguments are external or constant defs use a vector type with
2277 the same size as the output vector type. */
2278 if (!vectype_in)
2279 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2280 if (vec_stmt)
2281 gcc_assert (vectype_in);
2282 if (!vectype_in)
2284 if (dump_enabled_p ())
2286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2287 "no vectype for scalar type ");
2288 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2289 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2292 return false;
2295 /* FORNOW */
2296 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2297 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2298 if (nunits_in == nunits_out / 2)
2299 modifier = NARROW;
2300 else if (nunits_out == nunits_in)
2301 modifier = NONE;
2302 else if (nunits_out == nunits_in / 2)
2303 modifier = WIDEN;
2304 else
2305 return false;
2307 /* For now, we only vectorize functions if a target specific builtin
2308 is available. TODO -- in some cases, it might be profitable to
2309 insert the calls for pieces of the vector, in order to be able
2310 to vectorize other operations in the loop. */
2311 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2312 if (fndecl == NULL_TREE)
2314 if (gimple_call_internal_p (stmt)
2315 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2316 && !slp_node
2317 && loop_vinfo
2318 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2319 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2320 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2321 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2323 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2324 { 0, 1, 2, ... vf - 1 } vector. */
2325 gcc_assert (nargs == 0);
2327 else
2329 if (dump_enabled_p ())
2330 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2331 "function is not vectorizable.\n");
2332 return false;
2336 gcc_assert (!gimple_vuse (stmt));
2338 if (slp_node || PURE_SLP_STMT (stmt_info))
2339 ncopies = 1;
2340 else if (modifier == NARROW)
2341 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2342 else
2343 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2345 /* Sanity check: make sure that at least one copy of the vectorized stmt
2346 needs to be generated. */
2347 gcc_assert (ncopies >= 1);
2349 if (!vec_stmt) /* transformation not required. */
2351 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2352 if (dump_enabled_p ())
2353 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2354 "\n");
2355 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2356 return true;
2359 /** Transform. **/
2361 if (dump_enabled_p ())
2362 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2364 /* Handle def. */
2365 scalar_dest = gimple_call_lhs (stmt);
2366 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2368 prev_stmt_info = NULL;
2369 switch (modifier)
2371 case NONE:
2372 for (j = 0; j < ncopies; ++j)
2374 /* Build argument list for the vectorized call. */
2375 if (j == 0)
2376 vargs.create (nargs);
2377 else
2378 vargs.truncate (0);
2380 if (slp_node)
2382 auto_vec<vec<tree> > vec_defs (nargs);
2383 vec<tree> vec_oprnds0;
2385 for (i = 0; i < nargs; i++)
2386 vargs.quick_push (gimple_call_arg (stmt, i));
2387 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2388 vec_oprnds0 = vec_defs[0];
2390 /* Arguments are ready. Create the new vector stmt. */
2391 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2393 size_t k;
2394 for (k = 0; k < nargs; k++)
2396 vec<tree> vec_oprndsk = vec_defs[k];
2397 vargs[k] = vec_oprndsk[i];
2399 new_stmt = gimple_build_call_vec (fndecl, vargs);
2400 new_temp = make_ssa_name (vec_dest, new_stmt);
2401 gimple_call_set_lhs (new_stmt, new_temp);
2402 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2403 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2406 for (i = 0; i < nargs; i++)
2408 vec<tree> vec_oprndsi = vec_defs[i];
2409 vec_oprndsi.release ();
2411 continue;
2414 for (i = 0; i < nargs; i++)
2416 op = gimple_call_arg (stmt, i);
2417 if (j == 0)
2418 vec_oprnd0
2419 = vect_get_vec_def_for_operand (op, stmt, NULL);
2420 else
2422 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2423 vec_oprnd0
2424 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2427 vargs.quick_push (vec_oprnd0);
2430 if (gimple_call_internal_p (stmt)
2431 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2433 tree *v = XALLOCAVEC (tree, nunits_out);
2434 int k;
2435 for (k = 0; k < nunits_out; ++k)
2436 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2437 tree cst = build_vector (vectype_out, v);
2438 tree new_var
2439 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2440 gimple *init_stmt = gimple_build_assign (new_var, cst);
2441 new_temp = make_ssa_name (new_var, init_stmt);
2442 gimple_assign_set_lhs (init_stmt, new_temp);
2443 vect_init_vector_1 (stmt, init_stmt, NULL);
2444 new_temp = make_ssa_name (vec_dest);
2445 new_stmt = gimple_build_assign (new_temp,
2446 gimple_assign_lhs (init_stmt));
2448 else
2450 new_stmt = gimple_build_call_vec (fndecl, vargs);
2451 new_temp = make_ssa_name (vec_dest, new_stmt);
2452 gimple_call_set_lhs (new_stmt, new_temp);
2454 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2456 if (j == 0)
2457 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2458 else
2459 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2461 prev_stmt_info = vinfo_for_stmt (new_stmt);
2464 break;
2466 case NARROW:
2467 for (j = 0; j < ncopies; ++j)
2469 /* Build argument list for the vectorized call. */
2470 if (j == 0)
2471 vargs.create (nargs * 2);
2472 else
2473 vargs.truncate (0);
2475 if (slp_node)
2477 auto_vec<vec<tree> > vec_defs (nargs);
2478 vec<tree> vec_oprnds0;
2480 for (i = 0; i < nargs; i++)
2481 vargs.quick_push (gimple_call_arg (stmt, i));
2482 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2483 vec_oprnds0 = vec_defs[0];
2485 /* Arguments are ready. Create the new vector stmt. */
2486 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2488 size_t k;
2489 vargs.truncate (0);
2490 for (k = 0; k < nargs; k++)
2492 vec<tree> vec_oprndsk = vec_defs[k];
2493 vargs.quick_push (vec_oprndsk[i]);
2494 vargs.quick_push (vec_oprndsk[i + 1]);
2496 new_stmt = gimple_build_call_vec (fndecl, vargs);
2497 new_temp = make_ssa_name (vec_dest, new_stmt);
2498 gimple_call_set_lhs (new_stmt, new_temp);
2499 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2500 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2503 for (i = 0; i < nargs; i++)
2505 vec<tree> vec_oprndsi = vec_defs[i];
2506 vec_oprndsi.release ();
2508 continue;
2511 for (i = 0; i < nargs; i++)
2513 op = gimple_call_arg (stmt, i);
2514 if (j == 0)
2516 vec_oprnd0
2517 = vect_get_vec_def_for_operand (op, stmt, NULL);
2518 vec_oprnd1
2519 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2521 else
2523 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2524 vec_oprnd0
2525 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2526 vec_oprnd1
2527 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2530 vargs.quick_push (vec_oprnd0);
2531 vargs.quick_push (vec_oprnd1);
2534 new_stmt = gimple_build_call_vec (fndecl, vargs);
2535 new_temp = make_ssa_name (vec_dest, new_stmt);
2536 gimple_call_set_lhs (new_stmt, new_temp);
2537 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2539 if (j == 0)
2540 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2541 else
2542 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2544 prev_stmt_info = vinfo_for_stmt (new_stmt);
2547 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2549 break;
2551 case WIDEN:
2552 /* No current target implements this case. */
2553 return false;
2556 vargs.release ();
2558 /* The call in STMT might prevent it from being removed in dce.
2559 We however cannot remove it here, due to the way the ssa name
2560 it defines is mapped to the new definition. So just replace
2561 rhs of the statement with something harmless. */
2563 if (slp_node)
2564 return true;
2566 type = TREE_TYPE (scalar_dest);
2567 if (is_pattern_stmt_p (stmt_info))
2568 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2569 else
2570 lhs = gimple_call_lhs (stmt);
2572 if (gimple_call_internal_p (stmt)
2573 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2575 /* Replace uses of the lhs of GOMP_SIMD_LANE call outside the loop
2576 with vf - 1 rather than 0, that is the last iteration of the
2577 vectorized loop. */
2578 imm_use_iterator iter;
2579 use_operand_p use_p;
2580 gimple *use_stmt;
2581 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2583 basic_block use_bb = gimple_bb (use_stmt);
2584 if (use_bb
2585 && !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo), use_bb))
2587 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2588 SET_USE (use_p, build_int_cst (TREE_TYPE (lhs),
2589 ncopies * nunits_out - 1));
2590 update_stmt (use_stmt);
2595 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2596 set_vinfo_for_stmt (new_stmt, stmt_info);
2597 set_vinfo_for_stmt (stmt, NULL);
2598 STMT_VINFO_STMT (stmt_info) = new_stmt;
2599 gsi_replace (gsi, new_stmt, false);
2601 return true;
2605 struct simd_call_arg_info
2607 tree vectype;
2608 tree op;
2609 enum vect_def_type dt;
2610 HOST_WIDE_INT linear_step;
2611 unsigned int align;
2612 bool simd_lane_linear;
2615 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
2616 is linear within simd lane (but not within whole loop), note it in
2617 *ARGINFO. */
2619 static void
2620 vect_simd_lane_linear (tree op, struct loop *loop,
2621 struct simd_call_arg_info *arginfo)
2623 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
2625 if (!is_gimple_assign (def_stmt)
2626 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
2627 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
2628 return;
2630 tree base = gimple_assign_rhs1 (def_stmt);
2631 HOST_WIDE_INT linear_step = 0;
2632 tree v = gimple_assign_rhs2 (def_stmt);
2633 while (TREE_CODE (v) == SSA_NAME)
2635 tree t;
2636 def_stmt = SSA_NAME_DEF_STMT (v);
2637 if (is_gimple_assign (def_stmt))
2638 switch (gimple_assign_rhs_code (def_stmt))
2640 case PLUS_EXPR:
2641 t = gimple_assign_rhs2 (def_stmt);
2642 if (linear_step || TREE_CODE (t) != INTEGER_CST)
2643 return;
2644 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
2645 v = gimple_assign_rhs1 (def_stmt);
2646 continue;
2647 case MULT_EXPR:
2648 t = gimple_assign_rhs2 (def_stmt);
2649 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
2650 return;
2651 linear_step = tree_to_shwi (t);
2652 v = gimple_assign_rhs1 (def_stmt);
2653 continue;
2654 CASE_CONVERT:
2655 t = gimple_assign_rhs1 (def_stmt);
2656 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
2657 || (TYPE_PRECISION (TREE_TYPE (v))
2658 < TYPE_PRECISION (TREE_TYPE (t))))
2659 return;
2660 if (!linear_step)
2661 linear_step = 1;
2662 v = t;
2663 continue;
2664 default:
2665 return;
2667 else if (is_gimple_call (def_stmt)
2668 && gimple_call_internal_p (def_stmt)
2669 && gimple_call_internal_fn (def_stmt) == IFN_GOMP_SIMD_LANE
2670 && loop->simduid
2671 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
2672 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
2673 == loop->simduid))
2675 if (!linear_step)
2676 linear_step = 1;
2677 arginfo->linear_step = linear_step;
2678 arginfo->op = base;
2679 arginfo->simd_lane_linear = true;
2680 return;
2685 /* Function vectorizable_simd_clone_call.
2687 Check if STMT performs a function call that can be vectorized
2688 by calling a simd clone of the function.
2689 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2690 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2691 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2693 static bool
2694 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
2695 gimple **vec_stmt, slp_tree slp_node)
2697 tree vec_dest;
2698 tree scalar_dest;
2699 tree op, type;
2700 tree vec_oprnd0 = NULL_TREE;
2701 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2702 tree vectype;
2703 unsigned int nunits;
2704 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2705 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2706 vec_info *vinfo = stmt_info->vinfo;
2707 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2708 tree fndecl, new_temp, def;
2709 gimple *def_stmt;
2710 gimple *new_stmt = NULL;
2711 int ncopies, j;
2712 vec<simd_call_arg_info> arginfo = vNULL;
2713 vec<tree> vargs = vNULL;
2714 size_t i, nargs;
2715 tree lhs, rtype, ratype;
2716 vec<constructor_elt, va_gc> *ret_ctor_elts;
2718 /* Is STMT a vectorizable call? */
2719 if (!is_gimple_call (stmt))
2720 return false;
2722 fndecl = gimple_call_fndecl (stmt);
2723 if (fndecl == NULL_TREE)
2724 return false;
2726 struct cgraph_node *node = cgraph_node::get (fndecl);
2727 if (node == NULL || node->simd_clones == NULL)
2728 return false;
2730 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2731 return false;
2733 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2734 return false;
2736 if (gimple_call_lhs (stmt)
2737 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2738 return false;
2740 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2742 vectype = STMT_VINFO_VECTYPE (stmt_info);
2744 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2745 return false;
2747 /* FORNOW */
2748 if (slp_node || PURE_SLP_STMT (stmt_info))
2749 return false;
2751 /* Process function arguments. */
2752 nargs = gimple_call_num_args (stmt);
2754 /* Bail out if the function has zero arguments. */
2755 if (nargs == 0)
2756 return false;
2758 arginfo.create (nargs);
2760 for (i = 0; i < nargs; i++)
2762 simd_call_arg_info thisarginfo;
2763 affine_iv iv;
2765 thisarginfo.linear_step = 0;
2766 thisarginfo.align = 0;
2767 thisarginfo.op = NULL_TREE;
2768 thisarginfo.simd_lane_linear = false;
2770 op = gimple_call_arg (stmt, i);
2771 if (!vect_is_simple_use_1 (op, stmt, vinfo,
2772 &def_stmt, &def, &thisarginfo.dt,
2773 &thisarginfo.vectype)
2774 || thisarginfo.dt == vect_uninitialized_def)
2776 if (dump_enabled_p ())
2777 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2778 "use not simple.\n");
2779 arginfo.release ();
2780 return false;
2783 if (thisarginfo.dt == vect_constant_def
2784 || thisarginfo.dt == vect_external_def)
2785 gcc_assert (thisarginfo.vectype == NULL_TREE);
2786 else
2787 gcc_assert (thisarginfo.vectype != NULL_TREE);
2789 /* For linear arguments, the analyze phase should have saved
2790 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2791 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2792 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
2794 gcc_assert (vec_stmt);
2795 thisarginfo.linear_step
2796 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
2797 thisarginfo.op
2798 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
2799 thisarginfo.simd_lane_linear
2800 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
2801 == boolean_true_node);
2802 /* If loop has been peeled for alignment, we need to adjust it. */
2803 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2804 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2805 if (n1 != n2 && !thisarginfo.simd_lane_linear)
2807 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2808 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
2809 tree opt = TREE_TYPE (thisarginfo.op);
2810 bias = fold_convert (TREE_TYPE (step), bias);
2811 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2812 thisarginfo.op
2813 = fold_build2 (POINTER_TYPE_P (opt)
2814 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2815 thisarginfo.op, bias);
2818 else if (!vec_stmt
2819 && thisarginfo.dt != vect_constant_def
2820 && thisarginfo.dt != vect_external_def
2821 && loop_vinfo
2822 && TREE_CODE (op) == SSA_NAME
2823 && simple_iv (loop, loop_containing_stmt (stmt), op,
2824 &iv, false)
2825 && tree_fits_shwi_p (iv.step))
2827 thisarginfo.linear_step = tree_to_shwi (iv.step);
2828 thisarginfo.op = iv.base;
2830 else if ((thisarginfo.dt == vect_constant_def
2831 || thisarginfo.dt == vect_external_def)
2832 && POINTER_TYPE_P (TREE_TYPE (op)))
2833 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2834 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
2835 linear too. */
2836 if (POINTER_TYPE_P (TREE_TYPE (op))
2837 && !thisarginfo.linear_step
2838 && !vec_stmt
2839 && thisarginfo.dt != vect_constant_def
2840 && thisarginfo.dt != vect_external_def
2841 && loop_vinfo
2842 && !slp_node
2843 && TREE_CODE (op) == SSA_NAME)
2844 vect_simd_lane_linear (op, loop, &thisarginfo);
2846 arginfo.quick_push (thisarginfo);
2849 unsigned int badness = 0;
2850 struct cgraph_node *bestn = NULL;
2851 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2852 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
2853 else
2854 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2855 n = n->simdclone->next_clone)
2857 unsigned int this_badness = 0;
2858 if (n->simdclone->simdlen
2859 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2860 || n->simdclone->nargs != nargs)
2861 continue;
2862 if (n->simdclone->simdlen
2863 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2864 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2865 - exact_log2 (n->simdclone->simdlen)) * 1024;
2866 if (n->simdclone->inbranch)
2867 this_badness += 2048;
2868 int target_badness = targetm.simd_clone.usable (n);
2869 if (target_badness < 0)
2870 continue;
2871 this_badness += target_badness * 512;
2872 /* FORNOW: Have to add code to add the mask argument. */
2873 if (n->simdclone->inbranch)
2874 continue;
2875 for (i = 0; i < nargs; i++)
2877 switch (n->simdclone->args[i].arg_type)
2879 case SIMD_CLONE_ARG_TYPE_VECTOR:
2880 if (!useless_type_conversion_p
2881 (n->simdclone->args[i].orig_type,
2882 TREE_TYPE (gimple_call_arg (stmt, i))))
2883 i = -1;
2884 else if (arginfo[i].dt == vect_constant_def
2885 || arginfo[i].dt == vect_external_def
2886 || arginfo[i].linear_step)
2887 this_badness += 64;
2888 break;
2889 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2890 if (arginfo[i].dt != vect_constant_def
2891 && arginfo[i].dt != vect_external_def)
2892 i = -1;
2893 break;
2894 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2895 if (arginfo[i].dt == vect_constant_def
2896 || arginfo[i].dt == vect_external_def
2897 || (arginfo[i].linear_step
2898 != n->simdclone->args[i].linear_step))
2899 i = -1;
2900 break;
2901 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2902 /* FORNOW */
2903 i = -1;
2904 break;
2905 case SIMD_CLONE_ARG_TYPE_MASK:
2906 gcc_unreachable ();
2908 if (i == (size_t) -1)
2909 break;
2910 if (n->simdclone->args[i].alignment > arginfo[i].align)
2912 i = -1;
2913 break;
2915 if (arginfo[i].align)
2916 this_badness += (exact_log2 (arginfo[i].align)
2917 - exact_log2 (n->simdclone->args[i].alignment));
2919 if (i == (size_t) -1)
2920 continue;
2921 if (bestn == NULL || this_badness < badness)
2923 bestn = n;
2924 badness = this_badness;
2928 if (bestn == NULL)
2930 arginfo.release ();
2931 return false;
2934 for (i = 0; i < nargs; i++)
2935 if ((arginfo[i].dt == vect_constant_def
2936 || arginfo[i].dt == vect_external_def)
2937 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2939 arginfo[i].vectype
2940 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2941 i)));
2942 if (arginfo[i].vectype == NULL
2943 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2944 > bestn->simdclone->simdlen))
2946 arginfo.release ();
2947 return false;
2951 fndecl = bestn->decl;
2952 nunits = bestn->simdclone->simdlen;
2953 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2955 /* If the function isn't const, only allow it in simd loops where user
2956 has asserted that at least nunits consecutive iterations can be
2957 performed using SIMD instructions. */
2958 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2959 && gimple_vuse (stmt))
2961 arginfo.release ();
2962 return false;
2965 /* Sanity check: make sure that at least one copy of the vectorized stmt
2966 needs to be generated. */
2967 gcc_assert (ncopies >= 1);
2969 if (!vec_stmt) /* transformation not required. */
2971 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
2972 for (i = 0; i < nargs; i++)
2973 if (bestn->simdclone->args[i].arg_type
2974 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
2976 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
2977 + 1);
2978 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
2979 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
2980 ? size_type_node : TREE_TYPE (arginfo[i].op);
2981 tree ls = build_int_cst (lst, arginfo[i].linear_step);
2982 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
2983 tree sll = arginfo[i].simd_lane_linear
2984 ? boolean_true_node : boolean_false_node;
2985 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
2987 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2988 if (dump_enabled_p ())
2989 dump_printf_loc (MSG_NOTE, vect_location,
2990 "=== vectorizable_simd_clone_call ===\n");
2991 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2992 arginfo.release ();
2993 return true;
2996 /** Transform. **/
2998 if (dump_enabled_p ())
2999 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3001 /* Handle def. */
3002 scalar_dest = gimple_call_lhs (stmt);
3003 vec_dest = NULL_TREE;
3004 rtype = NULL_TREE;
3005 ratype = NULL_TREE;
3006 if (scalar_dest)
3008 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3009 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3010 if (TREE_CODE (rtype) == ARRAY_TYPE)
3012 ratype = rtype;
3013 rtype = TREE_TYPE (ratype);
3017 prev_stmt_info = NULL;
3018 for (j = 0; j < ncopies; ++j)
3020 /* Build argument list for the vectorized call. */
3021 if (j == 0)
3022 vargs.create (nargs);
3023 else
3024 vargs.truncate (0);
3026 for (i = 0; i < nargs; i++)
3028 unsigned int k, l, m, o;
3029 tree atype;
3030 op = gimple_call_arg (stmt, i);
3031 switch (bestn->simdclone->args[i].arg_type)
3033 case SIMD_CLONE_ARG_TYPE_VECTOR:
3034 atype = bestn->simdclone->args[i].vector_type;
3035 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3036 for (m = j * o; m < (j + 1) * o; m++)
3038 if (TYPE_VECTOR_SUBPARTS (atype)
3039 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3041 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3042 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3043 / TYPE_VECTOR_SUBPARTS (atype));
3044 gcc_assert ((k & (k - 1)) == 0);
3045 if (m == 0)
3046 vec_oprnd0
3047 = vect_get_vec_def_for_operand (op, stmt, NULL);
3048 else
3050 vec_oprnd0 = arginfo[i].op;
3051 if ((m & (k - 1)) == 0)
3052 vec_oprnd0
3053 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3054 vec_oprnd0);
3056 arginfo[i].op = vec_oprnd0;
3057 vec_oprnd0
3058 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3059 size_int (prec),
3060 bitsize_int ((m & (k - 1)) * prec));
3061 new_stmt
3062 = gimple_build_assign (make_ssa_name (atype),
3063 vec_oprnd0);
3064 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3065 vargs.safe_push (gimple_assign_lhs (new_stmt));
3067 else
3069 k = (TYPE_VECTOR_SUBPARTS (atype)
3070 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3071 gcc_assert ((k & (k - 1)) == 0);
3072 vec<constructor_elt, va_gc> *ctor_elts;
3073 if (k != 1)
3074 vec_alloc (ctor_elts, k);
3075 else
3076 ctor_elts = NULL;
3077 for (l = 0; l < k; l++)
3079 if (m == 0 && l == 0)
3080 vec_oprnd0
3081 = vect_get_vec_def_for_operand (op, stmt, NULL);
3082 else
3083 vec_oprnd0
3084 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3085 arginfo[i].op);
3086 arginfo[i].op = vec_oprnd0;
3087 if (k == 1)
3088 break;
3089 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3090 vec_oprnd0);
3092 if (k == 1)
3093 vargs.safe_push (vec_oprnd0);
3094 else
3096 vec_oprnd0 = build_constructor (atype, ctor_elts);
3097 new_stmt
3098 = gimple_build_assign (make_ssa_name (atype),
3099 vec_oprnd0);
3100 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3101 vargs.safe_push (gimple_assign_lhs (new_stmt));
3105 break;
3106 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3107 vargs.safe_push (op);
3108 break;
3109 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3110 if (j == 0)
3112 gimple_seq stmts;
3113 arginfo[i].op
3114 = force_gimple_operand (arginfo[i].op, &stmts, true,
3115 NULL_TREE);
3116 if (stmts != NULL)
3118 basic_block new_bb;
3119 edge pe = loop_preheader_edge (loop);
3120 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3121 gcc_assert (!new_bb);
3123 if (arginfo[i].simd_lane_linear)
3125 vargs.safe_push (arginfo[i].op);
3126 break;
3128 tree phi_res = copy_ssa_name (op);
3129 gphi *new_phi = create_phi_node (phi_res, loop->header);
3130 set_vinfo_for_stmt (new_phi,
3131 new_stmt_vec_info (new_phi, loop_vinfo));
3132 add_phi_arg (new_phi, arginfo[i].op,
3133 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3134 enum tree_code code
3135 = POINTER_TYPE_P (TREE_TYPE (op))
3136 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3137 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3138 ? sizetype : TREE_TYPE (op);
3139 widest_int cst
3140 = wi::mul (bestn->simdclone->args[i].linear_step,
3141 ncopies * nunits);
3142 tree tcst = wide_int_to_tree (type, cst);
3143 tree phi_arg = copy_ssa_name (op);
3144 new_stmt
3145 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3146 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3147 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3148 set_vinfo_for_stmt (new_stmt,
3149 new_stmt_vec_info (new_stmt, loop_vinfo));
3150 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3151 UNKNOWN_LOCATION);
3152 arginfo[i].op = phi_res;
3153 vargs.safe_push (phi_res);
3155 else
3157 enum tree_code code
3158 = POINTER_TYPE_P (TREE_TYPE (op))
3159 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3160 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3161 ? sizetype : TREE_TYPE (op);
3162 widest_int cst
3163 = wi::mul (bestn->simdclone->args[i].linear_step,
3164 j * nunits);
3165 tree tcst = wide_int_to_tree (type, cst);
3166 new_temp = make_ssa_name (TREE_TYPE (op));
3167 new_stmt = gimple_build_assign (new_temp, code,
3168 arginfo[i].op, tcst);
3169 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3170 vargs.safe_push (new_temp);
3172 break;
3173 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3174 default:
3175 gcc_unreachable ();
3179 new_stmt = gimple_build_call_vec (fndecl, vargs);
3180 if (vec_dest)
3182 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3183 if (ratype)
3184 new_temp = create_tmp_var (ratype);
3185 else if (TYPE_VECTOR_SUBPARTS (vectype)
3186 == TYPE_VECTOR_SUBPARTS (rtype))
3187 new_temp = make_ssa_name (vec_dest, new_stmt);
3188 else
3189 new_temp = make_ssa_name (rtype, new_stmt);
3190 gimple_call_set_lhs (new_stmt, new_temp);
3192 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3194 if (vec_dest)
3196 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3198 unsigned int k, l;
3199 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3200 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3201 gcc_assert ((k & (k - 1)) == 0);
3202 for (l = 0; l < k; l++)
3204 tree t;
3205 if (ratype)
3207 t = build_fold_addr_expr (new_temp);
3208 t = build2 (MEM_REF, vectype, t,
3209 build_int_cst (TREE_TYPE (t),
3210 l * prec / BITS_PER_UNIT));
3212 else
3213 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3214 size_int (prec), bitsize_int (l * prec));
3215 new_stmt
3216 = gimple_build_assign (make_ssa_name (vectype), t);
3217 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3218 if (j == 0 && l == 0)
3219 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3220 else
3221 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3223 prev_stmt_info = vinfo_for_stmt (new_stmt);
3226 if (ratype)
3228 tree clobber = build_constructor (ratype, NULL);
3229 TREE_THIS_VOLATILE (clobber) = 1;
3230 new_stmt = gimple_build_assign (new_temp, clobber);
3231 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3233 continue;
3235 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3237 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3238 / TYPE_VECTOR_SUBPARTS (rtype));
3239 gcc_assert ((k & (k - 1)) == 0);
3240 if ((j & (k - 1)) == 0)
3241 vec_alloc (ret_ctor_elts, k);
3242 if (ratype)
3244 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3245 for (m = 0; m < o; m++)
3247 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3248 size_int (m), NULL_TREE, NULL_TREE);
3249 new_stmt
3250 = gimple_build_assign (make_ssa_name (rtype), tem);
3251 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3252 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3253 gimple_assign_lhs (new_stmt));
3255 tree clobber = build_constructor (ratype, NULL);
3256 TREE_THIS_VOLATILE (clobber) = 1;
3257 new_stmt = gimple_build_assign (new_temp, clobber);
3258 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3260 else
3261 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3262 if ((j & (k - 1)) != k - 1)
3263 continue;
3264 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3265 new_stmt
3266 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3267 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3269 if ((unsigned) j == k - 1)
3270 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3271 else
3272 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3274 prev_stmt_info = vinfo_for_stmt (new_stmt);
3275 continue;
3277 else if (ratype)
3279 tree t = build_fold_addr_expr (new_temp);
3280 t = build2 (MEM_REF, vectype, t,
3281 build_int_cst (TREE_TYPE (t), 0));
3282 new_stmt
3283 = gimple_build_assign (make_ssa_name (vec_dest), t);
3284 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3285 tree clobber = build_constructor (ratype, NULL);
3286 TREE_THIS_VOLATILE (clobber) = 1;
3287 vect_finish_stmt_generation (stmt,
3288 gimple_build_assign (new_temp,
3289 clobber), gsi);
3293 if (j == 0)
3294 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3295 else
3296 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3298 prev_stmt_info = vinfo_for_stmt (new_stmt);
3301 vargs.release ();
3303 /* The call in STMT might prevent it from being removed in dce.
3304 We however cannot remove it here, due to the way the ssa name
3305 it defines is mapped to the new definition. So just replace
3306 rhs of the statement with something harmless. */
3308 if (slp_node)
3309 return true;
3311 if (scalar_dest)
3313 type = TREE_TYPE (scalar_dest);
3314 if (is_pattern_stmt_p (stmt_info))
3315 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3316 else
3317 lhs = gimple_call_lhs (stmt);
3318 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3320 else
3321 new_stmt = gimple_build_nop ();
3322 set_vinfo_for_stmt (new_stmt, stmt_info);
3323 set_vinfo_for_stmt (stmt, NULL);
3324 STMT_VINFO_STMT (stmt_info) = new_stmt;
3325 gsi_replace (gsi, new_stmt, true);
3326 unlink_stmt_vdef (stmt);
3328 return true;
3332 /* Function vect_gen_widened_results_half
3334 Create a vector stmt whose code, type, number of arguments, and result
3335 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3336 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3337 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3338 needs to be created (DECL is a function-decl of a target-builtin).
3339 STMT is the original scalar stmt that we are vectorizing. */
3341 static gimple *
3342 vect_gen_widened_results_half (enum tree_code code,
3343 tree decl,
3344 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3345 tree vec_dest, gimple_stmt_iterator *gsi,
3346 gimple *stmt)
3348 gimple *new_stmt;
3349 tree new_temp;
3351 /* Generate half of the widened result: */
3352 if (code == CALL_EXPR)
3354 /* Target specific support */
3355 if (op_type == binary_op)
3356 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3357 else
3358 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3359 new_temp = make_ssa_name (vec_dest, new_stmt);
3360 gimple_call_set_lhs (new_stmt, new_temp);
3362 else
3364 /* Generic support */
3365 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3366 if (op_type != binary_op)
3367 vec_oprnd1 = NULL;
3368 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3369 new_temp = make_ssa_name (vec_dest, new_stmt);
3370 gimple_assign_set_lhs (new_stmt, new_temp);
3372 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3374 return new_stmt;
3378 /* Get vectorized definitions for loop-based vectorization. For the first
3379 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3380 scalar operand), and for the rest we get a copy with
3381 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3382 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3383 The vectors are collected into VEC_OPRNDS. */
3385 static void
3386 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3387 vec<tree> *vec_oprnds, int multi_step_cvt)
3389 tree vec_oprnd;
3391 /* Get first vector operand. */
3392 /* All the vector operands except the very first one (that is scalar oprnd)
3393 are stmt copies. */
3394 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3395 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3396 else
3397 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3399 vec_oprnds->quick_push (vec_oprnd);
3401 /* Get second vector operand. */
3402 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3403 vec_oprnds->quick_push (vec_oprnd);
3405 *oprnd = vec_oprnd;
3407 /* For conversion in multiple steps, continue to get operands
3408 recursively. */
3409 if (multi_step_cvt)
3410 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3414 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3415 For multi-step conversions store the resulting vectors and call the function
3416 recursively. */
3418 static void
3419 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3420 int multi_step_cvt, gimple *stmt,
3421 vec<tree> vec_dsts,
3422 gimple_stmt_iterator *gsi,
3423 slp_tree slp_node, enum tree_code code,
3424 stmt_vec_info *prev_stmt_info)
3426 unsigned int i;
3427 tree vop0, vop1, new_tmp, vec_dest;
3428 gimple *new_stmt;
3429 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3431 vec_dest = vec_dsts.pop ();
3433 for (i = 0; i < vec_oprnds->length (); i += 2)
3435 /* Create demotion operation. */
3436 vop0 = (*vec_oprnds)[i];
3437 vop1 = (*vec_oprnds)[i + 1];
3438 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3439 new_tmp = make_ssa_name (vec_dest, new_stmt);
3440 gimple_assign_set_lhs (new_stmt, new_tmp);
3441 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3443 if (multi_step_cvt)
3444 /* Store the resulting vector for next recursive call. */
3445 (*vec_oprnds)[i/2] = new_tmp;
3446 else
3448 /* This is the last step of the conversion sequence. Store the
3449 vectors in SLP_NODE or in vector info of the scalar statement
3450 (or in STMT_VINFO_RELATED_STMT chain). */
3451 if (slp_node)
3452 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3453 else
3455 if (!*prev_stmt_info)
3456 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3457 else
3458 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3460 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3465 /* For multi-step demotion operations we first generate demotion operations
3466 from the source type to the intermediate types, and then combine the
3467 results (stored in VEC_OPRNDS) in demotion operation to the destination
3468 type. */
3469 if (multi_step_cvt)
3471 /* At each level of recursion we have half of the operands we had at the
3472 previous level. */
3473 vec_oprnds->truncate ((i+1)/2);
3474 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3475 stmt, vec_dsts, gsi, slp_node,
3476 VEC_PACK_TRUNC_EXPR,
3477 prev_stmt_info);
3480 vec_dsts.quick_push (vec_dest);
3484 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3485 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3486 the resulting vectors and call the function recursively. */
3488 static void
3489 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3490 vec<tree> *vec_oprnds1,
3491 gimple *stmt, tree vec_dest,
3492 gimple_stmt_iterator *gsi,
3493 enum tree_code code1,
3494 enum tree_code code2, tree decl1,
3495 tree decl2, int op_type)
3497 int i;
3498 tree vop0, vop1, new_tmp1, new_tmp2;
3499 gimple *new_stmt1, *new_stmt2;
3500 vec<tree> vec_tmp = vNULL;
3502 vec_tmp.create (vec_oprnds0->length () * 2);
3503 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3505 if (op_type == binary_op)
3506 vop1 = (*vec_oprnds1)[i];
3507 else
3508 vop1 = NULL_TREE;
3510 /* Generate the two halves of promotion operation. */
3511 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3512 op_type, vec_dest, gsi, stmt);
3513 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3514 op_type, vec_dest, gsi, stmt);
3515 if (is_gimple_call (new_stmt1))
3517 new_tmp1 = gimple_call_lhs (new_stmt1);
3518 new_tmp2 = gimple_call_lhs (new_stmt2);
3520 else
3522 new_tmp1 = gimple_assign_lhs (new_stmt1);
3523 new_tmp2 = gimple_assign_lhs (new_stmt2);
3526 /* Store the results for the next step. */
3527 vec_tmp.quick_push (new_tmp1);
3528 vec_tmp.quick_push (new_tmp2);
3531 vec_oprnds0->release ();
3532 *vec_oprnds0 = vec_tmp;
3536 /* Check if STMT performs a conversion operation, that can be vectorized.
3537 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3538 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3539 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3541 static bool
3542 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
3543 gimple **vec_stmt, slp_tree slp_node)
3545 tree vec_dest;
3546 tree scalar_dest;
3547 tree op0, op1 = NULL_TREE;
3548 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3549 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3550 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3551 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3552 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3553 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3554 tree new_temp;
3555 tree def;
3556 gimple *def_stmt;
3557 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3558 gimple *new_stmt = NULL;
3559 stmt_vec_info prev_stmt_info;
3560 int nunits_in;
3561 int nunits_out;
3562 tree vectype_out, vectype_in;
3563 int ncopies, i, j;
3564 tree lhs_type, rhs_type;
3565 enum { NARROW, NONE, WIDEN } modifier;
3566 vec<tree> vec_oprnds0 = vNULL;
3567 vec<tree> vec_oprnds1 = vNULL;
3568 tree vop0;
3569 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3570 vec_info *vinfo = stmt_info->vinfo;
3571 int multi_step_cvt = 0;
3572 vec<tree> vec_dsts = vNULL;
3573 vec<tree> interm_types = vNULL;
3574 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3575 int op_type;
3576 machine_mode rhs_mode;
3577 unsigned short fltsz;
3579 /* Is STMT a vectorizable conversion? */
3581 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3582 return false;
3584 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3585 return false;
3587 if (!is_gimple_assign (stmt))
3588 return false;
3590 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3591 return false;
3593 code = gimple_assign_rhs_code (stmt);
3594 if (!CONVERT_EXPR_CODE_P (code)
3595 && code != FIX_TRUNC_EXPR
3596 && code != FLOAT_EXPR
3597 && code != WIDEN_MULT_EXPR
3598 && code != WIDEN_LSHIFT_EXPR)
3599 return false;
3601 op_type = TREE_CODE_LENGTH (code);
3603 /* Check types of lhs and rhs. */
3604 scalar_dest = gimple_assign_lhs (stmt);
3605 lhs_type = TREE_TYPE (scalar_dest);
3606 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3608 op0 = gimple_assign_rhs1 (stmt);
3609 rhs_type = TREE_TYPE (op0);
3611 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3612 && !((INTEGRAL_TYPE_P (lhs_type)
3613 && INTEGRAL_TYPE_P (rhs_type))
3614 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3615 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3616 return false;
3618 if ((INTEGRAL_TYPE_P (lhs_type)
3619 && (TYPE_PRECISION (lhs_type)
3620 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3621 || (INTEGRAL_TYPE_P (rhs_type)
3622 && (TYPE_PRECISION (rhs_type)
3623 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3625 if (dump_enabled_p ())
3626 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3627 "type conversion to/from bit-precision unsupported."
3628 "\n");
3629 return false;
3632 /* Check the operands of the operation. */
3633 if (!vect_is_simple_use_1 (op0, stmt, vinfo,
3634 &def_stmt, &def, &dt[0], &vectype_in))
3636 if (dump_enabled_p ())
3637 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3638 "use not simple.\n");
3639 return false;
3641 if (op_type == binary_op)
3643 bool ok;
3645 op1 = gimple_assign_rhs2 (stmt);
3646 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3647 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3648 OP1. */
3649 if (CONSTANT_CLASS_P (op0))
3650 ok = vect_is_simple_use_1 (op1, stmt, vinfo,
3651 &def_stmt, &def, &dt[1], &vectype_in);
3652 else
3653 ok = vect_is_simple_use (op1, stmt, vinfo, &def_stmt,
3654 &def, &dt[1]);
3656 if (!ok)
3658 if (dump_enabled_p ())
3659 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3660 "use not simple.\n");
3661 return false;
3665 /* If op0 is an external or constant defs use a vector type of
3666 the same size as the output vector type. */
3667 if (!vectype_in)
3668 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3669 if (vec_stmt)
3670 gcc_assert (vectype_in);
3671 if (!vectype_in)
3673 if (dump_enabled_p ())
3675 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3676 "no vectype for scalar type ");
3677 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3678 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3681 return false;
3684 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3685 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3686 if (nunits_in < nunits_out)
3687 modifier = NARROW;
3688 else if (nunits_out == nunits_in)
3689 modifier = NONE;
3690 else
3691 modifier = WIDEN;
3693 /* Multiple types in SLP are handled by creating the appropriate number of
3694 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3695 case of SLP. */
3696 if (slp_node || PURE_SLP_STMT (stmt_info))
3697 ncopies = 1;
3698 else if (modifier == NARROW)
3699 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3700 else
3701 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3703 /* Sanity check: make sure that at least one copy of the vectorized stmt
3704 needs to be generated. */
3705 gcc_assert (ncopies >= 1);
3707 /* Supportable by target? */
3708 switch (modifier)
3710 case NONE:
3711 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3712 return false;
3713 if (supportable_convert_operation (code, vectype_out, vectype_in,
3714 &decl1, &code1))
3715 break;
3716 /* FALLTHRU */
3717 unsupported:
3718 if (dump_enabled_p ())
3719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3720 "conversion not supported by target.\n");
3721 return false;
3723 case WIDEN:
3724 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3725 &code1, &code2, &multi_step_cvt,
3726 &interm_types))
3728 /* Binary widening operation can only be supported directly by the
3729 architecture. */
3730 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3731 break;
3734 if (code != FLOAT_EXPR
3735 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3736 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3737 goto unsupported;
3739 rhs_mode = TYPE_MODE (rhs_type);
3740 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3741 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3742 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3743 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3745 cvt_type
3746 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3747 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3748 if (cvt_type == NULL_TREE)
3749 goto unsupported;
3751 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3753 if (!supportable_convert_operation (code, vectype_out,
3754 cvt_type, &decl1, &codecvt1))
3755 goto unsupported;
3757 else if (!supportable_widening_operation (code, stmt, vectype_out,
3758 cvt_type, &codecvt1,
3759 &codecvt2, &multi_step_cvt,
3760 &interm_types))
3761 continue;
3762 else
3763 gcc_assert (multi_step_cvt == 0);
3765 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3766 vectype_in, &code1, &code2,
3767 &multi_step_cvt, &interm_types))
3768 break;
3771 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3772 goto unsupported;
3774 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3775 codecvt2 = ERROR_MARK;
3776 else
3778 multi_step_cvt++;
3779 interm_types.safe_push (cvt_type);
3780 cvt_type = NULL_TREE;
3782 break;
3784 case NARROW:
3785 gcc_assert (op_type == unary_op);
3786 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3787 &code1, &multi_step_cvt,
3788 &interm_types))
3789 break;
3791 if (code != FIX_TRUNC_EXPR
3792 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3793 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3794 goto unsupported;
3796 rhs_mode = TYPE_MODE (rhs_type);
3797 cvt_type
3798 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3799 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3800 if (cvt_type == NULL_TREE)
3801 goto unsupported;
3802 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3803 &decl1, &codecvt1))
3804 goto unsupported;
3805 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3806 &code1, &multi_step_cvt,
3807 &interm_types))
3808 break;
3809 goto unsupported;
3811 default:
3812 gcc_unreachable ();
3815 if (!vec_stmt) /* transformation not required. */
3817 if (dump_enabled_p ())
3818 dump_printf_loc (MSG_NOTE, vect_location,
3819 "=== vectorizable_conversion ===\n");
3820 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3822 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3823 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3825 else if (modifier == NARROW)
3827 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3828 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3830 else
3832 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3833 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3835 interm_types.release ();
3836 return true;
3839 /** Transform. **/
3840 if (dump_enabled_p ())
3841 dump_printf_loc (MSG_NOTE, vect_location,
3842 "transform conversion. ncopies = %d.\n", ncopies);
3844 if (op_type == binary_op)
3846 if (CONSTANT_CLASS_P (op0))
3847 op0 = fold_convert (TREE_TYPE (op1), op0);
3848 else if (CONSTANT_CLASS_P (op1))
3849 op1 = fold_convert (TREE_TYPE (op0), op1);
3852 /* In case of multi-step conversion, we first generate conversion operations
3853 to the intermediate types, and then from that types to the final one.
3854 We create vector destinations for the intermediate type (TYPES) received
3855 from supportable_*_operation, and store them in the correct order
3856 for future use in vect_create_vectorized_*_stmts (). */
3857 vec_dsts.create (multi_step_cvt + 1);
3858 vec_dest = vect_create_destination_var (scalar_dest,
3859 (cvt_type && modifier == WIDEN)
3860 ? cvt_type : vectype_out);
3861 vec_dsts.quick_push (vec_dest);
3863 if (multi_step_cvt)
3865 for (i = interm_types.length () - 1;
3866 interm_types.iterate (i, &intermediate_type); i--)
3868 vec_dest = vect_create_destination_var (scalar_dest,
3869 intermediate_type);
3870 vec_dsts.quick_push (vec_dest);
3874 if (cvt_type)
3875 vec_dest = vect_create_destination_var (scalar_dest,
3876 modifier == WIDEN
3877 ? vectype_out : cvt_type);
3879 if (!slp_node)
3881 if (modifier == WIDEN)
3883 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3884 if (op_type == binary_op)
3885 vec_oprnds1.create (1);
3887 else if (modifier == NARROW)
3888 vec_oprnds0.create (
3889 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3891 else if (code == WIDEN_LSHIFT_EXPR)
3892 vec_oprnds1.create (slp_node->vec_stmts_size);
3894 last_oprnd = op0;
3895 prev_stmt_info = NULL;
3896 switch (modifier)
3898 case NONE:
3899 for (j = 0; j < ncopies; j++)
3901 if (j == 0)
3902 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3903 -1);
3904 else
3905 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3907 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3909 /* Arguments are ready, create the new vector stmt. */
3910 if (code1 == CALL_EXPR)
3912 new_stmt = gimple_build_call (decl1, 1, vop0);
3913 new_temp = make_ssa_name (vec_dest, new_stmt);
3914 gimple_call_set_lhs (new_stmt, new_temp);
3916 else
3918 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3919 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
3920 new_temp = make_ssa_name (vec_dest, new_stmt);
3921 gimple_assign_set_lhs (new_stmt, new_temp);
3924 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3925 if (slp_node)
3926 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3927 else
3929 if (!prev_stmt_info)
3930 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3931 else
3932 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3933 prev_stmt_info = vinfo_for_stmt (new_stmt);
3937 break;
3939 case WIDEN:
3940 /* In case the vectorization factor (VF) is bigger than the number
3941 of elements that we can fit in a vectype (nunits), we have to
3942 generate more than one vector stmt - i.e - we need to "unroll"
3943 the vector stmt by a factor VF/nunits. */
3944 for (j = 0; j < ncopies; j++)
3946 /* Handle uses. */
3947 if (j == 0)
3949 if (slp_node)
3951 if (code == WIDEN_LSHIFT_EXPR)
3953 unsigned int k;
3955 vec_oprnd1 = op1;
3956 /* Store vec_oprnd1 for every vector stmt to be created
3957 for SLP_NODE. We check during the analysis that all
3958 the shift arguments are the same. */
3959 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3960 vec_oprnds1.quick_push (vec_oprnd1);
3962 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3963 slp_node, -1);
3965 else
3966 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3967 &vec_oprnds1, slp_node, -1);
3969 else
3971 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3972 vec_oprnds0.quick_push (vec_oprnd0);
3973 if (op_type == binary_op)
3975 if (code == WIDEN_LSHIFT_EXPR)
3976 vec_oprnd1 = op1;
3977 else
3978 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3979 NULL);
3980 vec_oprnds1.quick_push (vec_oprnd1);
3984 else
3986 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3987 vec_oprnds0.truncate (0);
3988 vec_oprnds0.quick_push (vec_oprnd0);
3989 if (op_type == binary_op)
3991 if (code == WIDEN_LSHIFT_EXPR)
3992 vec_oprnd1 = op1;
3993 else
3994 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3995 vec_oprnd1);
3996 vec_oprnds1.truncate (0);
3997 vec_oprnds1.quick_push (vec_oprnd1);
4001 /* Arguments are ready. Create the new vector stmts. */
4002 for (i = multi_step_cvt; i >= 0; i--)
4004 tree this_dest = vec_dsts[i];
4005 enum tree_code c1 = code1, c2 = code2;
4006 if (i == 0 && codecvt2 != ERROR_MARK)
4008 c1 = codecvt1;
4009 c2 = codecvt2;
4011 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4012 &vec_oprnds1,
4013 stmt, this_dest, gsi,
4014 c1, c2, decl1, decl2,
4015 op_type);
4018 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4020 if (cvt_type)
4022 if (codecvt1 == CALL_EXPR)
4024 new_stmt = gimple_build_call (decl1, 1, vop0);
4025 new_temp = make_ssa_name (vec_dest, new_stmt);
4026 gimple_call_set_lhs (new_stmt, new_temp);
4028 else
4030 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4031 new_temp = make_ssa_name (vec_dest);
4032 new_stmt = gimple_build_assign (new_temp, codecvt1,
4033 vop0);
4036 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4038 else
4039 new_stmt = SSA_NAME_DEF_STMT (vop0);
4041 if (slp_node)
4042 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4043 else
4045 if (!prev_stmt_info)
4046 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4047 else
4048 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4049 prev_stmt_info = vinfo_for_stmt (new_stmt);
4054 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4055 break;
4057 case NARROW:
4058 /* In case the vectorization factor (VF) is bigger than the number
4059 of elements that we can fit in a vectype (nunits), we have to
4060 generate more than one vector stmt - i.e - we need to "unroll"
4061 the vector stmt by a factor VF/nunits. */
4062 for (j = 0; j < ncopies; j++)
4064 /* Handle uses. */
4065 if (slp_node)
4066 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4067 slp_node, -1);
4068 else
4070 vec_oprnds0.truncate (0);
4071 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4072 vect_pow2 (multi_step_cvt) - 1);
4075 /* Arguments are ready. Create the new vector stmts. */
4076 if (cvt_type)
4077 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4079 if (codecvt1 == CALL_EXPR)
4081 new_stmt = gimple_build_call (decl1, 1, vop0);
4082 new_temp = make_ssa_name (vec_dest, new_stmt);
4083 gimple_call_set_lhs (new_stmt, new_temp);
4085 else
4087 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4088 new_temp = make_ssa_name (vec_dest);
4089 new_stmt = gimple_build_assign (new_temp, codecvt1,
4090 vop0);
4093 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4094 vec_oprnds0[i] = new_temp;
4097 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4098 stmt, vec_dsts, gsi,
4099 slp_node, code1,
4100 &prev_stmt_info);
4103 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4104 break;
4107 vec_oprnds0.release ();
4108 vec_oprnds1.release ();
4109 vec_dsts.release ();
4110 interm_types.release ();
4112 return true;
4116 /* Function vectorizable_assignment.
4118 Check if STMT performs an assignment (copy) that can be vectorized.
4119 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4120 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4121 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4123 static bool
4124 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4125 gimple **vec_stmt, slp_tree slp_node)
4127 tree vec_dest;
4128 tree scalar_dest;
4129 tree op;
4130 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4131 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4132 tree new_temp;
4133 tree def;
4134 gimple *def_stmt;
4135 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4136 int ncopies;
4137 int i, j;
4138 vec<tree> vec_oprnds = vNULL;
4139 tree vop;
4140 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4141 vec_info *vinfo = stmt_info->vinfo;
4142 gimple *new_stmt = NULL;
4143 stmt_vec_info prev_stmt_info = NULL;
4144 enum tree_code code;
4145 tree vectype_in;
4147 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4148 return false;
4150 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4151 return false;
4153 /* Is vectorizable assignment? */
4154 if (!is_gimple_assign (stmt))
4155 return false;
4157 scalar_dest = gimple_assign_lhs (stmt);
4158 if (TREE_CODE (scalar_dest) != SSA_NAME)
4159 return false;
4161 code = gimple_assign_rhs_code (stmt);
4162 if (gimple_assign_single_p (stmt)
4163 || code == PAREN_EXPR
4164 || CONVERT_EXPR_CODE_P (code))
4165 op = gimple_assign_rhs1 (stmt);
4166 else
4167 return false;
4169 if (code == VIEW_CONVERT_EXPR)
4170 op = TREE_OPERAND (op, 0);
4172 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4173 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4175 /* Multiple types in SLP are handled by creating the appropriate number of
4176 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4177 case of SLP. */
4178 if (slp_node || PURE_SLP_STMT (stmt_info))
4179 ncopies = 1;
4180 else
4181 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4183 gcc_assert (ncopies >= 1);
4185 if (!vect_is_simple_use_1 (op, stmt, vinfo,
4186 &def_stmt, &def, &dt[0], &vectype_in))
4188 if (dump_enabled_p ())
4189 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4190 "use not simple.\n");
4191 return false;
4194 /* We can handle NOP_EXPR conversions that do not change the number
4195 of elements or the vector size. */
4196 if ((CONVERT_EXPR_CODE_P (code)
4197 || code == VIEW_CONVERT_EXPR)
4198 && (!vectype_in
4199 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4200 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4201 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4202 return false;
4204 /* We do not handle bit-precision changes. */
4205 if ((CONVERT_EXPR_CODE_P (code)
4206 || code == VIEW_CONVERT_EXPR)
4207 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4208 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4209 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4210 || ((TYPE_PRECISION (TREE_TYPE (op))
4211 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4212 /* But a conversion that does not change the bit-pattern is ok. */
4213 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4214 > TYPE_PRECISION (TREE_TYPE (op)))
4215 && TYPE_UNSIGNED (TREE_TYPE (op))))
4217 if (dump_enabled_p ())
4218 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4219 "type conversion to/from bit-precision "
4220 "unsupported.\n");
4221 return false;
4224 if (!vec_stmt) /* transformation not required. */
4226 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4227 if (dump_enabled_p ())
4228 dump_printf_loc (MSG_NOTE, vect_location,
4229 "=== vectorizable_assignment ===\n");
4230 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4231 return true;
4234 /** Transform. **/
4235 if (dump_enabled_p ())
4236 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4238 /* Handle def. */
4239 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4241 /* Handle use. */
4242 for (j = 0; j < ncopies; j++)
4244 /* Handle uses. */
4245 if (j == 0)
4246 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4247 else
4248 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4250 /* Arguments are ready. create the new vector stmt. */
4251 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4253 if (CONVERT_EXPR_CODE_P (code)
4254 || code == VIEW_CONVERT_EXPR)
4255 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4256 new_stmt = gimple_build_assign (vec_dest, vop);
4257 new_temp = make_ssa_name (vec_dest, new_stmt);
4258 gimple_assign_set_lhs (new_stmt, new_temp);
4259 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4260 if (slp_node)
4261 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4264 if (slp_node)
4265 continue;
4267 if (j == 0)
4268 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4269 else
4270 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4272 prev_stmt_info = vinfo_for_stmt (new_stmt);
4275 vec_oprnds.release ();
4276 return true;
4280 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4281 either as shift by a scalar or by a vector. */
4283 bool
4284 vect_supportable_shift (enum tree_code code, tree scalar_type)
4287 machine_mode vec_mode;
4288 optab optab;
4289 int icode;
4290 tree vectype;
4292 vectype = get_vectype_for_scalar_type (scalar_type);
4293 if (!vectype)
4294 return false;
4296 optab = optab_for_tree_code (code, vectype, optab_scalar);
4297 if (!optab
4298 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4300 optab = optab_for_tree_code (code, vectype, optab_vector);
4301 if (!optab
4302 || (optab_handler (optab, TYPE_MODE (vectype))
4303 == CODE_FOR_nothing))
4304 return false;
4307 vec_mode = TYPE_MODE (vectype);
4308 icode = (int) optab_handler (optab, vec_mode);
4309 if (icode == CODE_FOR_nothing)
4310 return false;
4312 return true;
4316 /* Function vectorizable_shift.
4318 Check if STMT performs a shift operation that can be vectorized.
4319 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4320 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4321 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4323 static bool
4324 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4325 gimple **vec_stmt, slp_tree slp_node)
4327 tree vec_dest;
4328 tree scalar_dest;
4329 tree op0, op1 = NULL;
4330 tree vec_oprnd1 = NULL_TREE;
4331 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4332 tree vectype;
4333 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4334 enum tree_code code;
4335 machine_mode vec_mode;
4336 tree new_temp;
4337 optab optab;
4338 int icode;
4339 machine_mode optab_op2_mode;
4340 tree def;
4341 gimple *def_stmt;
4342 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4343 gimple *new_stmt = NULL;
4344 stmt_vec_info prev_stmt_info;
4345 int nunits_in;
4346 int nunits_out;
4347 tree vectype_out;
4348 tree op1_vectype;
4349 int ncopies;
4350 int j, i;
4351 vec<tree> vec_oprnds0 = vNULL;
4352 vec<tree> vec_oprnds1 = vNULL;
4353 tree vop0, vop1;
4354 unsigned int k;
4355 bool scalar_shift_arg = true;
4356 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4357 vec_info *vinfo = stmt_info->vinfo;
4358 int vf;
4360 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4361 return false;
4363 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4364 return false;
4366 /* Is STMT a vectorizable binary/unary operation? */
4367 if (!is_gimple_assign (stmt))
4368 return false;
4370 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4371 return false;
4373 code = gimple_assign_rhs_code (stmt);
4375 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4376 || code == RROTATE_EXPR))
4377 return false;
4379 scalar_dest = gimple_assign_lhs (stmt);
4380 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4381 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4382 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4384 if (dump_enabled_p ())
4385 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4386 "bit-precision shifts not supported.\n");
4387 return false;
4390 op0 = gimple_assign_rhs1 (stmt);
4391 if (!vect_is_simple_use_1 (op0, stmt, vinfo,
4392 &def_stmt, &def, &dt[0], &vectype))
4394 if (dump_enabled_p ())
4395 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4396 "use not simple.\n");
4397 return false;
4399 /* If op0 is an external or constant def use a vector type with
4400 the same size as the output vector type. */
4401 if (!vectype)
4402 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4403 if (vec_stmt)
4404 gcc_assert (vectype);
4405 if (!vectype)
4407 if (dump_enabled_p ())
4408 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4409 "no vectype for scalar type\n");
4410 return false;
4413 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4414 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4415 if (nunits_out != nunits_in)
4416 return false;
4418 op1 = gimple_assign_rhs2 (stmt);
4419 if (!vect_is_simple_use_1 (op1, stmt, vinfo, &def_stmt,
4420 &def, &dt[1], &op1_vectype))
4422 if (dump_enabled_p ())
4423 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4424 "use not simple.\n");
4425 return false;
4428 if (loop_vinfo)
4429 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4430 else
4431 vf = 1;
4433 /* Multiple types in SLP are handled by creating the appropriate number of
4434 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4435 case of SLP. */
4436 if (slp_node || PURE_SLP_STMT (stmt_info))
4437 ncopies = 1;
4438 else
4439 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4441 gcc_assert (ncopies >= 1);
4443 /* Determine whether the shift amount is a vector, or scalar. If the
4444 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4446 if ((dt[1] == vect_internal_def
4447 || dt[1] == vect_induction_def)
4448 && !slp_node)
4449 scalar_shift_arg = false;
4450 else if (dt[1] == vect_constant_def
4451 || dt[1] == vect_external_def
4452 || dt[1] == vect_internal_def)
4454 /* In SLP, need to check whether the shift count is the same,
4455 in loops if it is a constant or invariant, it is always
4456 a scalar shift. */
4457 if (slp_node)
4459 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4460 gimple *slpstmt;
4462 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4463 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4464 scalar_shift_arg = false;
4467 else
4469 if (dump_enabled_p ())
4470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4471 "operand mode requires invariant argument.\n");
4472 return false;
4475 /* Vector shifted by vector. */
4476 if (!scalar_shift_arg)
4478 optab = optab_for_tree_code (code, vectype, optab_vector);
4479 if (dump_enabled_p ())
4480 dump_printf_loc (MSG_NOTE, vect_location,
4481 "vector/vector shift/rotate found.\n");
4483 if (!op1_vectype)
4484 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4485 if (op1_vectype == NULL_TREE
4486 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4488 if (dump_enabled_p ())
4489 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4490 "unusable type for last operand in"
4491 " vector/vector shift/rotate.\n");
4492 return false;
4495 /* See if the machine has a vector shifted by scalar insn and if not
4496 then see if it has a vector shifted by vector insn. */
4497 else
4499 optab = optab_for_tree_code (code, vectype, optab_scalar);
4500 if (optab
4501 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4503 if (dump_enabled_p ())
4504 dump_printf_loc (MSG_NOTE, vect_location,
4505 "vector/scalar shift/rotate found.\n");
4507 else
4509 optab = optab_for_tree_code (code, vectype, optab_vector);
4510 if (optab
4511 && (optab_handler (optab, TYPE_MODE (vectype))
4512 != CODE_FOR_nothing))
4514 scalar_shift_arg = false;
4516 if (dump_enabled_p ())
4517 dump_printf_loc (MSG_NOTE, vect_location,
4518 "vector/vector shift/rotate found.\n");
4520 /* Unlike the other binary operators, shifts/rotates have
4521 the rhs being int, instead of the same type as the lhs,
4522 so make sure the scalar is the right type if we are
4523 dealing with vectors of long long/long/short/char. */
4524 if (dt[1] == vect_constant_def)
4525 op1 = fold_convert (TREE_TYPE (vectype), op1);
4526 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4527 TREE_TYPE (op1)))
4529 if (slp_node
4530 && TYPE_MODE (TREE_TYPE (vectype))
4531 != TYPE_MODE (TREE_TYPE (op1)))
4533 if (dump_enabled_p ())
4534 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4535 "unusable type for last operand in"
4536 " vector/vector shift/rotate.\n");
4537 return false;
4539 if (vec_stmt && !slp_node)
4541 op1 = fold_convert (TREE_TYPE (vectype), op1);
4542 op1 = vect_init_vector (stmt, op1,
4543 TREE_TYPE (vectype), NULL);
4550 /* Supportable by target? */
4551 if (!optab)
4553 if (dump_enabled_p ())
4554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4555 "no optab.\n");
4556 return false;
4558 vec_mode = TYPE_MODE (vectype);
4559 icode = (int) optab_handler (optab, vec_mode);
4560 if (icode == CODE_FOR_nothing)
4562 if (dump_enabled_p ())
4563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4564 "op not supported by target.\n");
4565 /* Check only during analysis. */
4566 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4567 || (vf < vect_min_worthwhile_factor (code)
4568 && !vec_stmt))
4569 return false;
4570 if (dump_enabled_p ())
4571 dump_printf_loc (MSG_NOTE, vect_location,
4572 "proceeding using word mode.\n");
4575 /* Worthwhile without SIMD support? Check only during analysis. */
4576 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4577 && vf < vect_min_worthwhile_factor (code)
4578 && !vec_stmt)
4580 if (dump_enabled_p ())
4581 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4582 "not worthwhile without SIMD support.\n");
4583 return false;
4586 if (!vec_stmt) /* transformation not required. */
4588 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4589 if (dump_enabled_p ())
4590 dump_printf_loc (MSG_NOTE, vect_location,
4591 "=== vectorizable_shift ===\n");
4592 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4593 return true;
4596 /** Transform. **/
4598 if (dump_enabled_p ())
4599 dump_printf_loc (MSG_NOTE, vect_location,
4600 "transform binary/unary operation.\n");
4602 /* Handle def. */
4603 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4605 prev_stmt_info = NULL;
4606 for (j = 0; j < ncopies; j++)
4608 /* Handle uses. */
4609 if (j == 0)
4611 if (scalar_shift_arg)
4613 /* Vector shl and shr insn patterns can be defined with scalar
4614 operand 2 (shift operand). In this case, use constant or loop
4615 invariant op1 directly, without extending it to vector mode
4616 first. */
4617 optab_op2_mode = insn_data[icode].operand[2].mode;
4618 if (!VECTOR_MODE_P (optab_op2_mode))
4620 if (dump_enabled_p ())
4621 dump_printf_loc (MSG_NOTE, vect_location,
4622 "operand 1 using scalar mode.\n");
4623 vec_oprnd1 = op1;
4624 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4625 vec_oprnds1.quick_push (vec_oprnd1);
4626 if (slp_node)
4628 /* Store vec_oprnd1 for every vector stmt to be created
4629 for SLP_NODE. We check during the analysis that all
4630 the shift arguments are the same.
4631 TODO: Allow different constants for different vector
4632 stmts generated for an SLP instance. */
4633 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4634 vec_oprnds1.quick_push (vec_oprnd1);
4639 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4640 (a special case for certain kind of vector shifts); otherwise,
4641 operand 1 should be of a vector type (the usual case). */
4642 if (vec_oprnd1)
4643 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4644 slp_node, -1);
4645 else
4646 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4647 slp_node, -1);
4649 else
4650 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4652 /* Arguments are ready. Create the new vector stmt. */
4653 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4655 vop1 = vec_oprnds1[i];
4656 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4657 new_temp = make_ssa_name (vec_dest, new_stmt);
4658 gimple_assign_set_lhs (new_stmt, new_temp);
4659 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4660 if (slp_node)
4661 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4664 if (slp_node)
4665 continue;
4667 if (j == 0)
4668 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4669 else
4670 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4671 prev_stmt_info = vinfo_for_stmt (new_stmt);
4674 vec_oprnds0.release ();
4675 vec_oprnds1.release ();
4677 return true;
4681 /* Function vectorizable_operation.
4683 Check if STMT performs a binary, unary or ternary operation that can
4684 be vectorized.
4685 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4686 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4687 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4689 static bool
4690 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
4691 gimple **vec_stmt, slp_tree slp_node)
4693 tree vec_dest;
4694 tree scalar_dest;
4695 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4696 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4697 tree vectype;
4698 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4699 enum tree_code code;
4700 machine_mode vec_mode;
4701 tree new_temp;
4702 int op_type;
4703 optab optab;
4704 bool target_support_p;
4705 tree def;
4706 gimple *def_stmt;
4707 enum vect_def_type dt[3]
4708 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4709 gimple *new_stmt = NULL;
4710 stmt_vec_info prev_stmt_info;
4711 int nunits_in;
4712 int nunits_out;
4713 tree vectype_out;
4714 int ncopies;
4715 int j, i;
4716 vec<tree> vec_oprnds0 = vNULL;
4717 vec<tree> vec_oprnds1 = vNULL;
4718 vec<tree> vec_oprnds2 = vNULL;
4719 tree vop0, vop1, vop2;
4720 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4721 vec_info *vinfo = stmt_info->vinfo;
4722 int vf;
4724 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4725 return false;
4727 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4728 return false;
4730 /* Is STMT a vectorizable binary/unary operation? */
4731 if (!is_gimple_assign (stmt))
4732 return false;
4734 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4735 return false;
4737 code = gimple_assign_rhs_code (stmt);
4739 /* For pointer addition, we should use the normal plus for
4740 the vector addition. */
4741 if (code == POINTER_PLUS_EXPR)
4742 code = PLUS_EXPR;
4744 /* Support only unary or binary operations. */
4745 op_type = TREE_CODE_LENGTH (code);
4746 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4748 if (dump_enabled_p ())
4749 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4750 "num. args = %d (not unary/binary/ternary op).\n",
4751 op_type);
4752 return false;
4755 scalar_dest = gimple_assign_lhs (stmt);
4756 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4758 /* Most operations cannot handle bit-precision types without extra
4759 truncations. */
4760 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4761 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4762 /* Exception are bitwise binary operations. */
4763 && code != BIT_IOR_EXPR
4764 && code != BIT_XOR_EXPR
4765 && code != BIT_AND_EXPR)
4767 if (dump_enabled_p ())
4768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4769 "bit-precision arithmetic not supported.\n");
4770 return false;
4773 op0 = gimple_assign_rhs1 (stmt);
4774 if (!vect_is_simple_use_1 (op0, stmt, vinfo,
4775 &def_stmt, &def, &dt[0], &vectype))
4777 if (dump_enabled_p ())
4778 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4779 "use not simple.\n");
4780 return false;
4782 /* If op0 is an external or constant def use a vector type with
4783 the same size as the output vector type. */
4784 if (!vectype)
4785 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4786 if (vec_stmt)
4787 gcc_assert (vectype);
4788 if (!vectype)
4790 if (dump_enabled_p ())
4792 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4793 "no vectype for scalar type ");
4794 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4795 TREE_TYPE (op0));
4796 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4799 return false;
4802 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4803 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4804 if (nunits_out != nunits_in)
4805 return false;
4807 if (op_type == binary_op || op_type == ternary_op)
4809 op1 = gimple_assign_rhs2 (stmt);
4810 if (!vect_is_simple_use (op1, stmt, vinfo, &def_stmt,
4811 &def, &dt[1]))
4813 if (dump_enabled_p ())
4814 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4815 "use not simple.\n");
4816 return false;
4819 if (op_type == ternary_op)
4821 op2 = gimple_assign_rhs3 (stmt);
4822 if (!vect_is_simple_use (op2, stmt, vinfo, &def_stmt,
4823 &def, &dt[2]))
4825 if (dump_enabled_p ())
4826 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4827 "use not simple.\n");
4828 return false;
4832 if (loop_vinfo)
4833 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4834 else
4835 vf = 1;
4837 /* Multiple types in SLP are handled by creating the appropriate number of
4838 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4839 case of SLP. */
4840 if (slp_node || PURE_SLP_STMT (stmt_info))
4841 ncopies = 1;
4842 else
4843 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4845 gcc_assert (ncopies >= 1);
4847 /* Shifts are handled in vectorizable_shift (). */
4848 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4849 || code == RROTATE_EXPR)
4850 return false;
4852 /* Supportable by target? */
4854 vec_mode = TYPE_MODE (vectype);
4855 if (code == MULT_HIGHPART_EXPR)
4856 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
4857 else
4859 optab = optab_for_tree_code (code, vectype, optab_default);
4860 if (!optab)
4862 if (dump_enabled_p ())
4863 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4864 "no optab.\n");
4865 return false;
4867 target_support_p = (optab_handler (optab, vec_mode)
4868 != CODE_FOR_nothing);
4871 if (!target_support_p)
4873 if (dump_enabled_p ())
4874 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4875 "op not supported by target.\n");
4876 /* Check only during analysis. */
4877 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4878 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4879 return false;
4880 if (dump_enabled_p ())
4881 dump_printf_loc (MSG_NOTE, vect_location,
4882 "proceeding using word mode.\n");
4885 /* Worthwhile without SIMD support? Check only during analysis. */
4886 if (!VECTOR_MODE_P (vec_mode)
4887 && !vec_stmt
4888 && vf < vect_min_worthwhile_factor (code))
4890 if (dump_enabled_p ())
4891 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4892 "not worthwhile without SIMD support.\n");
4893 return false;
4896 if (!vec_stmt) /* transformation not required. */
4898 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
4899 if (dump_enabled_p ())
4900 dump_printf_loc (MSG_NOTE, vect_location,
4901 "=== vectorizable_operation ===\n");
4902 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4903 return true;
4906 /** Transform. **/
4908 if (dump_enabled_p ())
4909 dump_printf_loc (MSG_NOTE, vect_location,
4910 "transform binary/unary operation.\n");
4912 /* Handle def. */
4913 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4915 /* In case the vectorization factor (VF) is bigger than the number
4916 of elements that we can fit in a vectype (nunits), we have to generate
4917 more than one vector stmt - i.e - we need to "unroll" the
4918 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4919 from one copy of the vector stmt to the next, in the field
4920 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4921 stages to find the correct vector defs to be used when vectorizing
4922 stmts that use the defs of the current stmt. The example below
4923 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4924 we need to create 4 vectorized stmts):
4926 before vectorization:
4927 RELATED_STMT VEC_STMT
4928 S1: x = memref - -
4929 S2: z = x + 1 - -
4931 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4932 there):
4933 RELATED_STMT VEC_STMT
4934 VS1_0: vx0 = memref0 VS1_1 -
4935 VS1_1: vx1 = memref1 VS1_2 -
4936 VS1_2: vx2 = memref2 VS1_3 -
4937 VS1_3: vx3 = memref3 - -
4938 S1: x = load - VS1_0
4939 S2: z = x + 1 - -
4941 step2: vectorize stmt S2 (done here):
4942 To vectorize stmt S2 we first need to find the relevant vector
4943 def for the first operand 'x'. This is, as usual, obtained from
4944 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4945 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4946 relevant vector def 'vx0'. Having found 'vx0' we can generate
4947 the vector stmt VS2_0, and as usual, record it in the
4948 STMT_VINFO_VEC_STMT of stmt S2.
4949 When creating the second copy (VS2_1), we obtain the relevant vector
4950 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4951 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4952 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4953 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4954 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4955 chain of stmts and pointers:
4956 RELATED_STMT VEC_STMT
4957 VS1_0: vx0 = memref0 VS1_1 -
4958 VS1_1: vx1 = memref1 VS1_2 -
4959 VS1_2: vx2 = memref2 VS1_3 -
4960 VS1_3: vx3 = memref3 - -
4961 S1: x = load - VS1_0
4962 VS2_0: vz0 = vx0 + v1 VS2_1 -
4963 VS2_1: vz1 = vx1 + v1 VS2_2 -
4964 VS2_2: vz2 = vx2 + v1 VS2_3 -
4965 VS2_3: vz3 = vx3 + v1 - -
4966 S2: z = x + 1 - VS2_0 */
4968 prev_stmt_info = NULL;
4969 for (j = 0; j < ncopies; j++)
4971 /* Handle uses. */
4972 if (j == 0)
4974 if (op_type == binary_op || op_type == ternary_op)
4975 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4976 slp_node, -1);
4977 else
4978 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4979 slp_node, -1);
4980 if (op_type == ternary_op)
4982 vec_oprnds2.create (1);
4983 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4984 stmt,
4985 NULL));
4988 else
4990 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4991 if (op_type == ternary_op)
4993 tree vec_oprnd = vec_oprnds2.pop ();
4994 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4995 vec_oprnd));
4999 /* Arguments are ready. Create the new vector stmt. */
5000 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5002 vop1 = ((op_type == binary_op || op_type == ternary_op)
5003 ? vec_oprnds1[i] : NULL_TREE);
5004 vop2 = ((op_type == ternary_op)
5005 ? vec_oprnds2[i] : NULL_TREE);
5006 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5007 new_temp = make_ssa_name (vec_dest, new_stmt);
5008 gimple_assign_set_lhs (new_stmt, new_temp);
5009 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5010 if (slp_node)
5011 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5014 if (slp_node)
5015 continue;
5017 if (j == 0)
5018 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5019 else
5020 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5021 prev_stmt_info = vinfo_for_stmt (new_stmt);
5024 vec_oprnds0.release ();
5025 vec_oprnds1.release ();
5026 vec_oprnds2.release ();
5028 return true;
5031 /* A helper function to ensure data reference DR's base alignment
5032 for STMT_INFO. */
5034 static void
5035 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
5037 if (!dr->aux)
5038 return;
5040 if (DR_VECT_AUX (dr)->base_misaligned)
5042 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5043 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5045 if (decl_in_symtab_p (base_decl))
5046 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
5047 else
5049 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
5050 DECL_USER_ALIGN (base_decl) = 1;
5052 DR_VECT_AUX (dr)->base_misaligned = false;
5057 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
5058 reversal of the vector elements. If that is impossible to do,
5059 returns NULL. */
5061 static tree
5062 perm_mask_for_reverse (tree vectype)
5064 int i, nunits;
5065 unsigned char *sel;
5067 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5068 sel = XALLOCAVEC (unsigned char, nunits);
5070 for (i = 0; i < nunits; ++i)
5071 sel[i] = nunits - 1 - i;
5073 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
5074 return NULL_TREE;
5075 return vect_gen_perm_mask_checked (vectype, sel);
5078 /* Function vectorizable_store.
5080 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5081 can be vectorized.
5082 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5083 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5084 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5086 static bool
5087 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5088 slp_tree slp_node)
5090 tree scalar_dest;
5091 tree data_ref;
5092 tree op;
5093 tree vec_oprnd = NULL_TREE;
5094 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5095 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5096 tree elem_type;
5097 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5098 struct loop *loop = NULL;
5099 machine_mode vec_mode;
5100 tree dummy;
5101 enum dr_alignment_support alignment_support_scheme;
5102 tree def;
5103 gimple *def_stmt;
5104 enum vect_def_type dt;
5105 stmt_vec_info prev_stmt_info = NULL;
5106 tree dataref_ptr = NULL_TREE;
5107 tree dataref_offset = NULL_TREE;
5108 gimple *ptr_incr = NULL;
5109 int ncopies;
5110 int j;
5111 gimple *next_stmt, *first_stmt = NULL;
5112 bool grouped_store = false;
5113 bool store_lanes_p = false;
5114 unsigned int group_size, i;
5115 vec<tree> dr_chain = vNULL;
5116 vec<tree> oprnds = vNULL;
5117 vec<tree> result_chain = vNULL;
5118 bool inv_p;
5119 bool negative = false;
5120 tree offset = NULL_TREE;
5121 vec<tree> vec_oprnds = vNULL;
5122 bool slp = (slp_node != NULL);
5123 unsigned int vec_num;
5124 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5125 vec_info *vinfo = stmt_info->vinfo;
5126 tree aggr_type;
5127 tree scatter_base = NULL_TREE, scatter_off = NULL_TREE;
5128 tree scatter_off_vectype = NULL_TREE, scatter_decl = NULL_TREE;
5129 int scatter_scale = 1;
5130 enum vect_def_type scatter_idx_dt = vect_unknown_def_type;
5131 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5132 gimple *new_stmt;
5134 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5135 return false;
5137 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5138 return false;
5140 /* Is vectorizable store? */
5142 if (!is_gimple_assign (stmt))
5143 return false;
5145 scalar_dest = gimple_assign_lhs (stmt);
5146 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5147 && is_pattern_stmt_p (stmt_info))
5148 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5149 if (TREE_CODE (scalar_dest) != ARRAY_REF
5150 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5151 && TREE_CODE (scalar_dest) != INDIRECT_REF
5152 && TREE_CODE (scalar_dest) != COMPONENT_REF
5153 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5154 && TREE_CODE (scalar_dest) != REALPART_EXPR
5155 && TREE_CODE (scalar_dest) != MEM_REF)
5156 return false;
5158 gcc_assert (gimple_assign_single_p (stmt));
5160 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5161 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5163 if (loop_vinfo)
5164 loop = LOOP_VINFO_LOOP (loop_vinfo);
5166 /* Multiple types in SLP are handled by creating the appropriate number of
5167 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5168 case of SLP. */
5169 if (slp || PURE_SLP_STMT (stmt_info))
5170 ncopies = 1;
5171 else
5172 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5174 gcc_assert (ncopies >= 1);
5176 /* FORNOW. This restriction should be relaxed. */
5177 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5179 if (dump_enabled_p ())
5180 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5181 "multiple types in nested loop.\n");
5182 return false;
5185 op = gimple_assign_rhs1 (stmt);
5186 if (!vect_is_simple_use (op, stmt, vinfo, &def_stmt,
5187 &def, &dt))
5189 if (dump_enabled_p ())
5190 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5191 "use not simple.\n");
5192 return false;
5195 elem_type = TREE_TYPE (vectype);
5196 vec_mode = TYPE_MODE (vectype);
5198 /* FORNOW. In some cases can vectorize even if data-type not supported
5199 (e.g. - array initialization with 0). */
5200 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5201 return false;
5203 if (!STMT_VINFO_DATA_REF (stmt_info))
5204 return false;
5206 if (!STMT_VINFO_STRIDED_P (stmt_info))
5208 negative =
5209 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5210 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5211 size_zero_node) < 0;
5212 if (negative && ncopies > 1)
5214 if (dump_enabled_p ())
5215 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5216 "multiple types with negative step.\n");
5217 return false;
5219 if (negative)
5221 gcc_assert (!grouped_store);
5222 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5223 if (alignment_support_scheme != dr_aligned
5224 && alignment_support_scheme != dr_unaligned_supported)
5226 if (dump_enabled_p ())
5227 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5228 "negative step but alignment required.\n");
5229 return false;
5231 if (dt != vect_constant_def
5232 && dt != vect_external_def
5233 && !perm_mask_for_reverse (vectype))
5235 if (dump_enabled_p ())
5236 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5237 "negative step and reversing not supported.\n");
5238 return false;
5243 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5245 grouped_store = true;
5246 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5247 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5248 if (!slp
5249 && !PURE_SLP_STMT (stmt_info)
5250 && !STMT_VINFO_STRIDED_P (stmt_info))
5252 if (vect_store_lanes_supported (vectype, group_size))
5253 store_lanes_p = true;
5254 else if (!vect_grouped_store_supported (vectype, group_size))
5255 return false;
5258 if (STMT_VINFO_STRIDED_P (stmt_info)
5259 && (slp || PURE_SLP_STMT (stmt_info))
5260 && (group_size > nunits
5261 || nunits % group_size != 0))
5263 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5264 "unhandled strided group store\n");
5265 return false;
5268 if (first_stmt == stmt)
5270 /* STMT is the leader of the group. Check the operands of all the
5271 stmts of the group. */
5272 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5273 while (next_stmt)
5275 gcc_assert (gimple_assign_single_p (next_stmt));
5276 op = gimple_assign_rhs1 (next_stmt);
5277 if (!vect_is_simple_use (op, next_stmt, vinfo,
5278 &def_stmt, &def, &dt))
5280 if (dump_enabled_p ())
5281 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5282 "use not simple.\n");
5283 return false;
5285 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5290 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5292 gimple *def_stmt;
5293 tree def;
5294 scatter_decl = vect_check_gather_scatter (stmt, loop_vinfo, &scatter_base,
5295 &scatter_off, &scatter_scale);
5296 gcc_assert (scatter_decl);
5297 if (!vect_is_simple_use_1 (scatter_off, NULL, vinfo,
5298 &def_stmt, &def, &scatter_idx_dt,
5299 &scatter_off_vectype))
5301 if (dump_enabled_p ())
5302 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5303 "scatter index use not simple.");
5304 return false;
5308 if (!vec_stmt) /* transformation not required. */
5310 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5311 /* The SLP costs are calculated during SLP analysis. */
5312 if (!PURE_SLP_STMT (stmt_info))
5313 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5314 NULL, NULL, NULL);
5315 return true;
5318 /** Transform. **/
5320 ensure_base_align (stmt_info, dr);
5322 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5324 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5325 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (scatter_decl));
5326 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5327 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5328 edge pe = loop_preheader_edge (loop);
5329 gimple_seq seq;
5330 basic_block new_bb;
5331 enum { NARROW, NONE, WIDEN } modifier;
5332 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (scatter_off_vectype);
5334 if (nunits == (unsigned int) scatter_off_nunits)
5335 modifier = NONE;
5336 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5338 unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
5339 modifier = WIDEN;
5341 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5342 sel[i] = i | nunits;
5344 perm_mask = vect_gen_perm_mask_checked (scatter_off_vectype, sel);
5345 gcc_assert (perm_mask != NULL_TREE);
5347 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5349 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5350 modifier = NARROW;
5352 for (i = 0; i < (unsigned int) nunits; ++i)
5353 sel[i] = i | scatter_off_nunits;
5355 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5356 gcc_assert (perm_mask != NULL_TREE);
5357 ncopies *= 2;
5359 else
5360 gcc_unreachable ();
5362 rettype = TREE_TYPE (TREE_TYPE (scatter_decl));
5363 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5364 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5365 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5366 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5367 scaletype = TREE_VALUE (arglist);
5369 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5370 && TREE_CODE (rettype) == VOID_TYPE);
5372 ptr = fold_convert (ptrtype, scatter_base);
5373 if (!is_gimple_min_invariant (ptr))
5375 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5376 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5377 gcc_assert (!new_bb);
5380 /* Currently we support only unconditional scatter stores,
5381 so mask should be all ones. */
5382 mask = build_int_cst (masktype, -1);
5383 mask = vect_init_vector (stmt, mask, masktype, NULL);
5385 scale = build_int_cst (scaletype, scatter_scale);
5387 prev_stmt_info = NULL;
5388 for (j = 0; j < ncopies; ++j)
5390 if (j == 0)
5392 src = vec_oprnd1
5393 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt, NULL);
5394 op = vec_oprnd0
5395 = vect_get_vec_def_for_operand (scatter_off, stmt, NULL);
5397 else if (modifier != NONE && (j & 1))
5399 if (modifier == WIDEN)
5401 src = vec_oprnd1
5402 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5403 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5404 stmt, gsi);
5406 else if (modifier == NARROW)
5408 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5409 stmt, gsi);
5410 op = vec_oprnd0
5411 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
5413 else
5414 gcc_unreachable ();
5416 else
5418 src = vec_oprnd1
5419 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5420 op = vec_oprnd0
5421 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
5424 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5426 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5427 == TYPE_VECTOR_SUBPARTS (srctype));
5428 var = vect_get_new_vect_var (srctype, vect_simple_var, NULL);
5429 var = make_ssa_name (var);
5430 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5431 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5432 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5433 src = var;
5436 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5438 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5439 == TYPE_VECTOR_SUBPARTS (idxtype));
5440 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
5441 var = make_ssa_name (var);
5442 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5443 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5444 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5445 op = var;
5448 new_stmt
5449 = gimple_build_call (scatter_decl, 5, ptr, mask, op, src, scale);
5451 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5453 if (prev_stmt_info == NULL)
5454 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5455 else
5456 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5457 prev_stmt_info = vinfo_for_stmt (new_stmt);
5459 return true;
5462 if (grouped_store)
5464 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5465 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5467 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5469 /* FORNOW */
5470 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5472 /* We vectorize all the stmts of the interleaving group when we
5473 reach the last stmt in the group. */
5474 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5475 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5476 && !slp)
5478 *vec_stmt = NULL;
5479 return true;
5482 if (slp)
5484 grouped_store = false;
5485 /* VEC_NUM is the number of vect stmts to be created for this
5486 group. */
5487 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5488 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5489 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5490 op = gimple_assign_rhs1 (first_stmt);
5492 else
5493 /* VEC_NUM is the number of vect stmts to be created for this
5494 group. */
5495 vec_num = group_size;
5497 else
5499 first_stmt = stmt;
5500 first_dr = dr;
5501 group_size = vec_num = 1;
5504 if (dump_enabled_p ())
5505 dump_printf_loc (MSG_NOTE, vect_location,
5506 "transform store. ncopies = %d\n", ncopies);
5508 if (STMT_VINFO_STRIDED_P (stmt_info))
5510 gimple_stmt_iterator incr_gsi;
5511 bool insert_after;
5512 gimple *incr;
5513 tree offvar;
5514 tree ivstep;
5515 tree running_off;
5516 gimple_seq stmts = NULL;
5517 tree stride_base, stride_step, alias_off;
5518 tree vec_oprnd;
5519 unsigned int g;
5521 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5523 stride_base
5524 = fold_build_pointer_plus
5525 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
5526 size_binop (PLUS_EXPR,
5527 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5528 convert_to_ptrofftype (DR_INIT(first_dr))));
5529 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
5531 /* For a store with loop-invariant (but other than power-of-2)
5532 stride (i.e. not a grouped access) like so:
5534 for (i = 0; i < n; i += stride)
5535 array[i] = ...;
5537 we generate a new induction variable and new stores from
5538 the components of the (vectorized) rhs:
5540 for (j = 0; ; j += VF*stride)
5541 vectemp = ...;
5542 tmp1 = vectemp[0];
5543 array[j] = tmp1;
5544 tmp2 = vectemp[1];
5545 array[j + stride] = tmp2;
5549 unsigned nstores = nunits;
5550 tree ltype = elem_type;
5551 if (slp)
5553 nstores = nunits / group_size;
5554 if (group_size < nunits)
5555 ltype = build_vector_type (elem_type, group_size);
5556 else
5557 ltype = vectype;
5558 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
5559 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5560 group_size = 1;
5563 ivstep = stride_step;
5564 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5565 build_int_cst (TREE_TYPE (ivstep),
5566 ncopies * nstores));
5568 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5570 create_iv (stride_base, ivstep, NULL,
5571 loop, &incr_gsi, insert_after,
5572 &offvar, NULL);
5573 incr = gsi_stmt (incr_gsi);
5574 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
5576 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5577 if (stmts)
5578 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5580 prev_stmt_info = NULL;
5581 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
5582 next_stmt = first_stmt;
5583 for (g = 0; g < group_size; g++)
5585 running_off = offvar;
5586 if (g)
5588 tree size = TYPE_SIZE_UNIT (ltype);
5589 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
5590 size);
5591 tree newoff = copy_ssa_name (running_off, NULL);
5592 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5593 running_off, pos);
5594 vect_finish_stmt_generation (stmt, incr, gsi);
5595 running_off = newoff;
5597 for (j = 0; j < ncopies; j++)
5599 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5600 and first_stmt == stmt. */
5601 if (j == 0)
5603 if (slp)
5605 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
5606 slp_node, -1);
5607 vec_oprnd = vec_oprnds[0];
5609 else
5611 gcc_assert (gimple_assign_single_p (next_stmt));
5612 op = gimple_assign_rhs1 (next_stmt);
5613 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5614 NULL);
5617 else
5619 if (slp)
5620 vec_oprnd = vec_oprnds[j];
5621 else
5623 vect_is_simple_use (vec_oprnd, NULL, vinfo,
5624 &def_stmt, &def, &dt);
5625 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
5629 for (i = 0; i < nstores; i++)
5631 tree newref, newoff;
5632 gimple *incr, *assign;
5633 tree size = TYPE_SIZE (ltype);
5634 /* Extract the i'th component. */
5635 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
5636 bitsize_int (i), size);
5637 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
5638 size, pos);
5640 elem = force_gimple_operand_gsi (gsi, elem, true,
5641 NULL_TREE, true,
5642 GSI_SAME_STMT);
5644 newref = build2 (MEM_REF, ltype,
5645 running_off, alias_off);
5647 /* And store it to *running_off. */
5648 assign = gimple_build_assign (newref, elem);
5649 vect_finish_stmt_generation (stmt, assign, gsi);
5651 newoff = copy_ssa_name (running_off, NULL);
5652 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5653 running_off, stride_step);
5654 vect_finish_stmt_generation (stmt, incr, gsi);
5656 running_off = newoff;
5657 if (g == group_size - 1
5658 && !slp)
5660 if (j == 0 && i == 0)
5661 STMT_VINFO_VEC_STMT (stmt_info)
5662 = *vec_stmt = assign;
5663 else
5664 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
5665 prev_stmt_info = vinfo_for_stmt (assign);
5669 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5671 return true;
5674 dr_chain.create (group_size);
5675 oprnds.create (group_size);
5677 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5678 gcc_assert (alignment_support_scheme);
5679 /* Targets with store-lane instructions must not require explicit
5680 realignment. */
5681 gcc_assert (!store_lanes_p
5682 || alignment_support_scheme == dr_aligned
5683 || alignment_support_scheme == dr_unaligned_supported);
5685 if (negative)
5686 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5688 if (store_lanes_p)
5689 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5690 else
5691 aggr_type = vectype;
5693 /* In case the vectorization factor (VF) is bigger than the number
5694 of elements that we can fit in a vectype (nunits), we have to generate
5695 more than one vector stmt - i.e - we need to "unroll" the
5696 vector stmt by a factor VF/nunits. For more details see documentation in
5697 vect_get_vec_def_for_copy_stmt. */
5699 /* In case of interleaving (non-unit grouped access):
5701 S1: &base + 2 = x2
5702 S2: &base = x0
5703 S3: &base + 1 = x1
5704 S4: &base + 3 = x3
5706 We create vectorized stores starting from base address (the access of the
5707 first stmt in the chain (S2 in the above example), when the last store stmt
5708 of the chain (S4) is reached:
5710 VS1: &base = vx2
5711 VS2: &base + vec_size*1 = vx0
5712 VS3: &base + vec_size*2 = vx1
5713 VS4: &base + vec_size*3 = vx3
5715 Then permutation statements are generated:
5717 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5718 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5721 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5722 (the order of the data-refs in the output of vect_permute_store_chain
5723 corresponds to the order of scalar stmts in the interleaving chain - see
5724 the documentation of vect_permute_store_chain()).
5726 In case of both multiple types and interleaving, above vector stores and
5727 permutation stmts are created for every copy. The result vector stmts are
5728 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5729 STMT_VINFO_RELATED_STMT for the next copies.
5732 prev_stmt_info = NULL;
5733 for (j = 0; j < ncopies; j++)
5736 if (j == 0)
5738 if (slp)
5740 /* Get vectorized arguments for SLP_NODE. */
5741 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5742 NULL, slp_node, -1);
5744 vec_oprnd = vec_oprnds[0];
5746 else
5748 /* For interleaved stores we collect vectorized defs for all the
5749 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5750 used as an input to vect_permute_store_chain(), and OPRNDS as
5751 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5753 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5754 OPRNDS are of size 1. */
5755 next_stmt = first_stmt;
5756 for (i = 0; i < group_size; i++)
5758 /* Since gaps are not supported for interleaved stores,
5759 GROUP_SIZE is the exact number of stmts in the chain.
5760 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5761 there is no interleaving, GROUP_SIZE is 1, and only one
5762 iteration of the loop will be executed. */
5763 gcc_assert (next_stmt
5764 && gimple_assign_single_p (next_stmt));
5765 op = gimple_assign_rhs1 (next_stmt);
5767 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5768 NULL);
5769 dr_chain.quick_push (vec_oprnd);
5770 oprnds.quick_push (vec_oprnd);
5771 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5775 /* We should have catched mismatched types earlier. */
5776 gcc_assert (useless_type_conversion_p (vectype,
5777 TREE_TYPE (vec_oprnd)));
5778 bool simd_lane_access_p
5779 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5780 if (simd_lane_access_p
5781 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5782 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5783 && integer_zerop (DR_OFFSET (first_dr))
5784 && integer_zerop (DR_INIT (first_dr))
5785 && alias_sets_conflict_p (get_alias_set (aggr_type),
5786 get_alias_set (DR_REF (first_dr))))
5788 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5789 dataref_offset = build_int_cst (reference_alias_ptr_type
5790 (DR_REF (first_dr)), 0);
5791 inv_p = false;
5793 else
5794 dataref_ptr
5795 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5796 simd_lane_access_p ? loop : NULL,
5797 offset, &dummy, gsi, &ptr_incr,
5798 simd_lane_access_p, &inv_p);
5799 gcc_assert (bb_vinfo || !inv_p);
5801 else
5803 /* For interleaved stores we created vectorized defs for all the
5804 defs stored in OPRNDS in the previous iteration (previous copy).
5805 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5806 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5807 next copy.
5808 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5809 OPRNDS are of size 1. */
5810 for (i = 0; i < group_size; i++)
5812 op = oprnds[i];
5813 vect_is_simple_use (op, NULL, vinfo, &def_stmt,
5814 &def, &dt);
5815 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5816 dr_chain[i] = vec_oprnd;
5817 oprnds[i] = vec_oprnd;
5819 if (dataref_offset)
5820 dataref_offset
5821 = int_const_binop (PLUS_EXPR, dataref_offset,
5822 TYPE_SIZE_UNIT (aggr_type));
5823 else
5824 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5825 TYPE_SIZE_UNIT (aggr_type));
5828 if (store_lanes_p)
5830 tree vec_array;
5832 /* Combine all the vectors into an array. */
5833 vec_array = create_vector_array (vectype, vec_num);
5834 for (i = 0; i < vec_num; i++)
5836 vec_oprnd = dr_chain[i];
5837 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5840 /* Emit:
5841 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5842 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5843 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5844 gimple_call_set_lhs (new_stmt, data_ref);
5845 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5847 else
5849 new_stmt = NULL;
5850 if (grouped_store)
5852 if (j == 0)
5853 result_chain.create (group_size);
5854 /* Permute. */
5855 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5856 &result_chain);
5859 next_stmt = first_stmt;
5860 for (i = 0; i < vec_num; i++)
5862 unsigned align, misalign;
5864 if (i > 0)
5865 /* Bump the vector pointer. */
5866 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5867 stmt, NULL_TREE);
5869 if (slp)
5870 vec_oprnd = vec_oprnds[i];
5871 else if (grouped_store)
5872 /* For grouped stores vectorized defs are interleaved in
5873 vect_permute_store_chain(). */
5874 vec_oprnd = result_chain[i];
5876 data_ref = fold_build2 (MEM_REF, TREE_TYPE (vec_oprnd),
5877 dataref_ptr,
5878 dataref_offset
5879 ? dataref_offset
5880 : build_int_cst (reference_alias_ptr_type
5881 (DR_REF (first_dr)), 0));
5882 align = TYPE_ALIGN_UNIT (vectype);
5883 if (aligned_access_p (first_dr))
5884 misalign = 0;
5885 else if (DR_MISALIGNMENT (first_dr) == -1)
5887 if (DR_VECT_AUX (first_dr)->base_element_aligned)
5888 align = TYPE_ALIGN_UNIT (elem_type);
5889 else
5890 align = get_object_alignment (DR_REF (first_dr))
5891 / BITS_PER_UNIT;
5892 misalign = 0;
5893 TREE_TYPE (data_ref)
5894 = build_aligned_type (TREE_TYPE (data_ref),
5895 align * BITS_PER_UNIT);
5897 else
5899 TREE_TYPE (data_ref)
5900 = build_aligned_type (TREE_TYPE (data_ref),
5901 TYPE_ALIGN (elem_type));
5902 misalign = DR_MISALIGNMENT (first_dr);
5904 if (dataref_offset == NULL_TREE
5905 && TREE_CODE (dataref_ptr) == SSA_NAME)
5906 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5907 misalign);
5909 if (negative
5910 && dt != vect_constant_def
5911 && dt != vect_external_def)
5913 tree perm_mask = perm_mask_for_reverse (vectype);
5914 tree perm_dest
5915 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5916 vectype);
5917 tree new_temp = make_ssa_name (perm_dest);
5919 /* Generate the permute statement. */
5920 gimple *perm_stmt
5921 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
5922 vec_oprnd, perm_mask);
5923 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5925 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5926 vec_oprnd = new_temp;
5929 /* Arguments are ready. Create the new vector stmt. */
5930 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5931 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5933 if (slp)
5934 continue;
5936 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5937 if (!next_stmt)
5938 break;
5941 if (!slp)
5943 if (j == 0)
5944 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5945 else
5946 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5947 prev_stmt_info = vinfo_for_stmt (new_stmt);
5951 dr_chain.release ();
5952 oprnds.release ();
5953 result_chain.release ();
5954 vec_oprnds.release ();
5956 return true;
5959 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5960 VECTOR_CST mask. No checks are made that the target platform supports the
5961 mask, so callers may wish to test can_vec_perm_p separately, or use
5962 vect_gen_perm_mask_checked. */
5964 tree
5965 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
5967 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
5968 int i, nunits;
5970 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5972 mask_elt_type = lang_hooks.types.type_for_mode
5973 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
5974 mask_type = get_vectype_for_scalar_type (mask_elt_type);
5976 mask_elts = XALLOCAVEC (tree, nunits);
5977 for (i = nunits - 1; i >= 0; i--)
5978 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5979 mask_vec = build_vector (mask_type, mask_elts);
5981 return mask_vec;
5984 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
5985 i.e. that the target supports the pattern _for arbitrary input vectors_. */
5987 tree
5988 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
5990 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
5991 return vect_gen_perm_mask_any (vectype, sel);
5994 /* Given a vector variable X and Y, that was generated for the scalar
5995 STMT, generate instructions to permute the vector elements of X and Y
5996 using permutation mask MASK_VEC, insert them at *GSI and return the
5997 permuted vector variable. */
5999 static tree
6000 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6001 gimple_stmt_iterator *gsi)
6003 tree vectype = TREE_TYPE (x);
6004 tree perm_dest, data_ref;
6005 gimple *perm_stmt;
6007 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6008 data_ref = make_ssa_name (perm_dest);
6010 /* Generate the permute statement. */
6011 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6012 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6014 return data_ref;
6017 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6018 inserting them on the loops preheader edge. Returns true if we
6019 were successful in doing so (and thus STMT can be moved then),
6020 otherwise returns false. */
6022 static bool
6023 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6025 ssa_op_iter i;
6026 tree op;
6027 bool any = false;
6029 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6031 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6032 if (!gimple_nop_p (def_stmt)
6033 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6035 /* Make sure we don't need to recurse. While we could do
6036 so in simple cases when there are more complex use webs
6037 we don't have an easy way to preserve stmt order to fulfil
6038 dependencies within them. */
6039 tree op2;
6040 ssa_op_iter i2;
6041 if (gimple_code (def_stmt) == GIMPLE_PHI)
6042 return false;
6043 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6045 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6046 if (!gimple_nop_p (def_stmt2)
6047 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6048 return false;
6050 any = true;
6054 if (!any)
6055 return true;
6057 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6059 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6060 if (!gimple_nop_p (def_stmt)
6061 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6063 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6064 gsi_remove (&gsi, false);
6065 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6069 return true;
6072 /* vectorizable_load.
6074 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6075 can be vectorized.
6076 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6077 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6078 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6080 static bool
6081 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6082 slp_tree slp_node, slp_instance slp_node_instance)
6084 tree scalar_dest;
6085 tree vec_dest = NULL;
6086 tree data_ref = NULL;
6087 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6088 stmt_vec_info prev_stmt_info;
6089 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6090 struct loop *loop = NULL;
6091 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6092 bool nested_in_vect_loop = false;
6093 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6094 tree elem_type;
6095 tree new_temp;
6096 machine_mode mode;
6097 gimple *new_stmt = NULL;
6098 tree dummy;
6099 enum dr_alignment_support alignment_support_scheme;
6100 tree dataref_ptr = NULL_TREE;
6101 tree dataref_offset = NULL_TREE;
6102 gimple *ptr_incr = NULL;
6103 int ncopies;
6104 int i, j, group_size = -1, group_gap_adj;
6105 tree msq = NULL_TREE, lsq;
6106 tree offset = NULL_TREE;
6107 tree byte_offset = NULL_TREE;
6108 tree realignment_token = NULL_TREE;
6109 gphi *phi = NULL;
6110 vec<tree> dr_chain = vNULL;
6111 bool grouped_load = false;
6112 bool load_lanes_p = false;
6113 gimple *first_stmt;
6114 bool inv_p;
6115 bool negative = false;
6116 bool compute_in_loop = false;
6117 struct loop *at_loop;
6118 int vec_num;
6119 bool slp = (slp_node != NULL);
6120 bool slp_perm = false;
6121 enum tree_code code;
6122 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6123 int vf;
6124 tree aggr_type;
6125 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
6126 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
6127 int gather_scale = 1;
6128 enum vect_def_type gather_dt = vect_unknown_def_type;
6129 vec_info *vinfo = stmt_info->vinfo;
6131 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6132 return false;
6134 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
6135 return false;
6137 /* Is vectorizable load? */
6138 if (!is_gimple_assign (stmt))
6139 return false;
6141 scalar_dest = gimple_assign_lhs (stmt);
6142 if (TREE_CODE (scalar_dest) != SSA_NAME)
6143 return false;
6145 code = gimple_assign_rhs_code (stmt);
6146 if (code != ARRAY_REF
6147 && code != BIT_FIELD_REF
6148 && code != INDIRECT_REF
6149 && code != COMPONENT_REF
6150 && code != IMAGPART_EXPR
6151 && code != REALPART_EXPR
6152 && code != MEM_REF
6153 && TREE_CODE_CLASS (code) != tcc_declaration)
6154 return false;
6156 if (!STMT_VINFO_DATA_REF (stmt_info))
6157 return false;
6159 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6160 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6162 if (loop_vinfo)
6164 loop = LOOP_VINFO_LOOP (loop_vinfo);
6165 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6166 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6168 else
6169 vf = 1;
6171 /* Multiple types in SLP are handled by creating the appropriate number of
6172 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6173 case of SLP. */
6174 if (slp || PURE_SLP_STMT (stmt_info))
6175 ncopies = 1;
6176 else
6177 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6179 gcc_assert (ncopies >= 1);
6181 /* FORNOW. This restriction should be relaxed. */
6182 if (nested_in_vect_loop && ncopies > 1)
6184 if (dump_enabled_p ())
6185 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6186 "multiple types in nested loop.\n");
6187 return false;
6190 /* Invalidate assumptions made by dependence analysis when vectorization
6191 on the unrolled body effectively re-orders stmts. */
6192 if (ncopies > 1
6193 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6194 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6195 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6197 if (dump_enabled_p ())
6198 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6199 "cannot perform implicit CSE when unrolling "
6200 "with negative dependence distance\n");
6201 return false;
6204 elem_type = TREE_TYPE (vectype);
6205 mode = TYPE_MODE (vectype);
6207 /* FORNOW. In some cases can vectorize even if data-type not supported
6208 (e.g. - data copies). */
6209 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6211 if (dump_enabled_p ())
6212 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6213 "Aligned load, but unsupported type.\n");
6214 return false;
6217 /* Check if the load is a part of an interleaving chain. */
6218 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6220 grouped_load = true;
6221 /* FORNOW */
6222 gcc_assert (!nested_in_vect_loop && !STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6224 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6226 /* If this is single-element interleaving with an element distance
6227 that leaves unused vector loads around punt - we at least create
6228 very sub-optimal code in that case (and blow up memory,
6229 see PR65518). */
6230 if (first_stmt == stmt
6231 && !GROUP_NEXT_ELEMENT (stmt_info)
6232 && GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
6234 if (dump_enabled_p ())
6235 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6236 "single-element interleaving not supported "
6237 "for not adjacent vector loads\n");
6238 return false;
6241 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6242 slp_perm = true;
6244 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6245 if (!slp
6246 && !PURE_SLP_STMT (stmt_info)
6247 && !STMT_VINFO_STRIDED_P (stmt_info))
6249 if (vect_load_lanes_supported (vectype, group_size))
6250 load_lanes_p = true;
6251 else if (!vect_grouped_load_supported (vectype, group_size))
6252 return false;
6255 /* Invalidate assumptions made by dependence analysis when vectorization
6256 on the unrolled body effectively re-orders stmts. */
6257 if (!PURE_SLP_STMT (stmt_info)
6258 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6259 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6260 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6262 if (dump_enabled_p ())
6263 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6264 "cannot perform implicit CSE when performing "
6265 "group loads with negative dependence distance\n");
6266 return false;
6269 /* Similarly when the stmt is a load that is both part of a SLP
6270 instance and a loop vectorized stmt via the same-dr mechanism
6271 we have to give up. */
6272 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6273 && (STMT_SLP_TYPE (stmt_info)
6274 != STMT_SLP_TYPE (vinfo_for_stmt
6275 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6277 if (dump_enabled_p ())
6278 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6279 "conflicting SLP types for CSEd load\n");
6280 return false;
6285 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6287 gimple *def_stmt;
6288 tree def;
6289 gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
6290 &gather_off, &gather_scale);
6291 gcc_assert (gather_decl);
6292 if (!vect_is_simple_use_1 (gather_off, NULL, vinfo,
6293 &def_stmt, &def, &gather_dt,
6294 &gather_off_vectype))
6296 if (dump_enabled_p ())
6297 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6298 "gather index use not simple.\n");
6299 return false;
6302 else if (STMT_VINFO_STRIDED_P (stmt_info))
6304 if ((grouped_load
6305 && (slp || PURE_SLP_STMT (stmt_info)))
6306 && (group_size > nunits
6307 || nunits % group_size != 0))
6309 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6310 "unhandled strided group load\n");
6311 return false;
6314 else
6316 negative = tree_int_cst_compare (nested_in_vect_loop
6317 ? STMT_VINFO_DR_STEP (stmt_info)
6318 : DR_STEP (dr),
6319 size_zero_node) < 0;
6320 if (negative && ncopies > 1)
6322 if (dump_enabled_p ())
6323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6324 "multiple types with negative step.\n");
6325 return false;
6328 if (negative)
6330 if (grouped_load)
6332 if (dump_enabled_p ())
6333 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6334 "negative step for group load not supported"
6335 "\n");
6336 return false;
6338 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
6339 if (alignment_support_scheme != dr_aligned
6340 && alignment_support_scheme != dr_unaligned_supported)
6342 if (dump_enabled_p ())
6343 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6344 "negative step but alignment required.\n");
6345 return false;
6347 if (!perm_mask_for_reverse (vectype))
6349 if (dump_enabled_p ())
6350 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6351 "negative step and reversing not supported."
6352 "\n");
6353 return false;
6358 if (!vec_stmt) /* transformation not required. */
6360 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6361 /* The SLP costs are calculated during SLP analysis. */
6362 if (!PURE_SLP_STMT (stmt_info))
6363 vect_model_load_cost (stmt_info, ncopies, load_lanes_p,
6364 NULL, NULL, NULL);
6365 return true;
6368 if (dump_enabled_p ())
6369 dump_printf_loc (MSG_NOTE, vect_location,
6370 "transform load. ncopies = %d\n", ncopies);
6372 /** Transform. **/
6374 ensure_base_align (stmt_info, dr);
6376 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6378 tree vec_oprnd0 = NULL_TREE, op;
6379 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
6380 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6381 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6382 edge pe = loop_preheader_edge (loop);
6383 gimple_seq seq;
6384 basic_block new_bb;
6385 enum { NARROW, NONE, WIDEN } modifier;
6386 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
6388 if (nunits == gather_off_nunits)
6389 modifier = NONE;
6390 else if (nunits == gather_off_nunits / 2)
6392 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6393 modifier = WIDEN;
6395 for (i = 0; i < gather_off_nunits; ++i)
6396 sel[i] = i | nunits;
6398 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
6400 else if (nunits == gather_off_nunits * 2)
6402 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6403 modifier = NARROW;
6405 for (i = 0; i < nunits; ++i)
6406 sel[i] = i < gather_off_nunits
6407 ? i : i + nunits - gather_off_nunits;
6409 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6410 ncopies *= 2;
6412 else
6413 gcc_unreachable ();
6415 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
6416 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6417 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6418 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6419 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6420 scaletype = TREE_VALUE (arglist);
6421 gcc_checking_assert (types_compatible_p (srctype, rettype));
6423 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6425 ptr = fold_convert (ptrtype, gather_base);
6426 if (!is_gimple_min_invariant (ptr))
6428 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6429 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6430 gcc_assert (!new_bb);
6433 /* Currently we support only unconditional gather loads,
6434 so mask should be all ones. */
6435 if (TREE_CODE (masktype) == INTEGER_TYPE)
6436 mask = build_int_cst (masktype, -1);
6437 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6439 mask = build_int_cst (TREE_TYPE (masktype), -1);
6440 mask = build_vector_from_val (masktype, mask);
6441 mask = vect_init_vector (stmt, mask, masktype, NULL);
6443 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6445 REAL_VALUE_TYPE r;
6446 long tmp[6];
6447 for (j = 0; j < 6; ++j)
6448 tmp[j] = -1;
6449 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6450 mask = build_real (TREE_TYPE (masktype), r);
6451 mask = build_vector_from_val (masktype, mask);
6452 mask = vect_init_vector (stmt, mask, masktype, NULL);
6454 else
6455 gcc_unreachable ();
6457 scale = build_int_cst (scaletype, gather_scale);
6459 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6460 merge = build_int_cst (TREE_TYPE (rettype), 0);
6461 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6463 REAL_VALUE_TYPE r;
6464 long tmp[6];
6465 for (j = 0; j < 6; ++j)
6466 tmp[j] = 0;
6467 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6468 merge = build_real (TREE_TYPE (rettype), r);
6470 else
6471 gcc_unreachable ();
6472 merge = build_vector_from_val (rettype, merge);
6473 merge = vect_init_vector (stmt, merge, rettype, NULL);
6475 prev_stmt_info = NULL;
6476 for (j = 0; j < ncopies; ++j)
6478 if (modifier == WIDEN && (j & 1))
6479 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6480 perm_mask, stmt, gsi);
6481 else if (j == 0)
6482 op = vec_oprnd0
6483 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
6484 else
6485 op = vec_oprnd0
6486 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6488 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6490 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6491 == TYPE_VECTOR_SUBPARTS (idxtype));
6492 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
6493 var = make_ssa_name (var);
6494 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6495 new_stmt
6496 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6497 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6498 op = var;
6501 new_stmt
6502 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
6504 if (!useless_type_conversion_p (vectype, rettype))
6506 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6507 == TYPE_VECTOR_SUBPARTS (rettype));
6508 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
6509 op = make_ssa_name (var, new_stmt);
6510 gimple_call_set_lhs (new_stmt, op);
6511 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6512 var = make_ssa_name (vec_dest);
6513 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6514 new_stmt
6515 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6517 else
6519 var = make_ssa_name (vec_dest, new_stmt);
6520 gimple_call_set_lhs (new_stmt, var);
6523 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6525 if (modifier == NARROW)
6527 if ((j & 1) == 0)
6529 prev_res = var;
6530 continue;
6532 var = permute_vec_elements (prev_res, var,
6533 perm_mask, stmt, gsi);
6534 new_stmt = SSA_NAME_DEF_STMT (var);
6537 if (prev_stmt_info == NULL)
6538 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6539 else
6540 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6541 prev_stmt_info = vinfo_for_stmt (new_stmt);
6543 return true;
6545 else if (STMT_VINFO_STRIDED_P (stmt_info))
6547 gimple_stmt_iterator incr_gsi;
6548 bool insert_after;
6549 gimple *incr;
6550 tree offvar;
6551 tree ivstep;
6552 tree running_off;
6553 vec<constructor_elt, va_gc> *v = NULL;
6554 gimple_seq stmts = NULL;
6555 tree stride_base, stride_step, alias_off;
6557 gcc_assert (!nested_in_vect_loop);
6559 if (slp && grouped_load)
6560 first_dr = STMT_VINFO_DATA_REF
6561 (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
6562 else
6563 first_dr = dr;
6565 stride_base
6566 = fold_build_pointer_plus
6567 (DR_BASE_ADDRESS (first_dr),
6568 size_binop (PLUS_EXPR,
6569 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6570 convert_to_ptrofftype (DR_INIT (first_dr))));
6571 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6573 /* For a load with loop-invariant (but other than power-of-2)
6574 stride (i.e. not a grouped access) like so:
6576 for (i = 0; i < n; i += stride)
6577 ... = array[i];
6579 we generate a new induction variable and new accesses to
6580 form a new vector (or vectors, depending on ncopies):
6582 for (j = 0; ; j += VF*stride)
6583 tmp1 = array[j];
6584 tmp2 = array[j + stride];
6586 vectemp = {tmp1, tmp2, ...}
6589 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6590 build_int_cst (TREE_TYPE (stride_step), vf));
6592 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6594 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
6595 loop, &incr_gsi, insert_after,
6596 &offvar, NULL);
6597 incr = gsi_stmt (incr_gsi);
6598 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6600 stride_step = force_gimple_operand (unshare_expr (stride_step),
6601 &stmts, true, NULL_TREE);
6602 if (stmts)
6603 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6605 prev_stmt_info = NULL;
6606 running_off = offvar;
6607 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
6608 int nloads = nunits;
6609 tree ltype = TREE_TYPE (vectype);
6610 auto_vec<tree> dr_chain;
6611 if (slp)
6613 nloads = nunits / group_size;
6614 if (group_size < nunits)
6615 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
6616 else
6617 ltype = vectype;
6618 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
6619 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6620 if (slp_perm)
6621 dr_chain.create (ncopies);
6623 for (j = 0; j < ncopies; j++)
6625 tree vec_inv;
6627 if (nloads > 1)
6629 vec_alloc (v, nloads);
6630 for (i = 0; i < nloads; i++)
6632 tree newref, newoff;
6633 gimple *incr;
6634 newref = build2 (MEM_REF, ltype, running_off, alias_off);
6636 newref = force_gimple_operand_gsi (gsi, newref, true,
6637 NULL_TREE, true,
6638 GSI_SAME_STMT);
6639 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6640 newoff = copy_ssa_name (running_off);
6641 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6642 running_off, stride_step);
6643 vect_finish_stmt_generation (stmt, incr, gsi);
6645 running_off = newoff;
6648 vec_inv = build_constructor (vectype, v);
6649 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6650 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6652 else
6654 new_stmt = gimple_build_assign (make_ssa_name (ltype),
6655 build2 (MEM_REF, ltype,
6656 running_off, alias_off));
6657 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6659 tree newoff = copy_ssa_name (running_off);
6660 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6661 running_off, stride_step);
6662 vect_finish_stmt_generation (stmt, incr, gsi);
6664 running_off = newoff;
6667 if (slp)
6669 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6670 if (slp_perm)
6671 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
6673 else
6675 if (j == 0)
6676 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6677 else
6678 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6679 prev_stmt_info = vinfo_for_stmt (new_stmt);
6682 if (slp_perm)
6683 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6684 slp_node_instance, false);
6685 return true;
6688 if (grouped_load)
6690 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6691 if (slp
6692 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
6693 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6694 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6696 /* Check if the chain of loads is already vectorized. */
6697 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6698 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6699 ??? But we can only do so if there is exactly one
6700 as we have no way to get at the rest. Leave the CSE
6701 opportunity alone.
6702 ??? With the group load eventually participating
6703 in multiple different permutations (having multiple
6704 slp nodes which refer to the same group) the CSE
6705 is even wrong code. See PR56270. */
6706 && !slp)
6708 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6709 return true;
6711 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6712 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6713 group_gap_adj = 0;
6715 /* VEC_NUM is the number of vect stmts to be created for this group. */
6716 if (slp)
6718 grouped_load = false;
6719 /* For SLP permutation support we need to load the whole group,
6720 not only the number of vector stmts the permutation result
6721 fits in. */
6722 if (slp_perm)
6723 vec_num = (group_size * vf + nunits - 1) / nunits;
6724 else
6725 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6726 group_gap_adj = vf * group_size - nunits * vec_num;
6728 else
6729 vec_num = group_size;
6731 else
6733 first_stmt = stmt;
6734 first_dr = dr;
6735 group_size = vec_num = 1;
6736 group_gap_adj = 0;
6739 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6740 gcc_assert (alignment_support_scheme);
6741 /* Targets with load-lane instructions must not require explicit
6742 realignment. */
6743 gcc_assert (!load_lanes_p
6744 || alignment_support_scheme == dr_aligned
6745 || alignment_support_scheme == dr_unaligned_supported);
6747 /* In case the vectorization factor (VF) is bigger than the number
6748 of elements that we can fit in a vectype (nunits), we have to generate
6749 more than one vector stmt - i.e - we need to "unroll" the
6750 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6751 from one copy of the vector stmt to the next, in the field
6752 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6753 stages to find the correct vector defs to be used when vectorizing
6754 stmts that use the defs of the current stmt. The example below
6755 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6756 need to create 4 vectorized stmts):
6758 before vectorization:
6759 RELATED_STMT VEC_STMT
6760 S1: x = memref - -
6761 S2: z = x + 1 - -
6763 step 1: vectorize stmt S1:
6764 We first create the vector stmt VS1_0, and, as usual, record a
6765 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6766 Next, we create the vector stmt VS1_1, and record a pointer to
6767 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6768 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6769 stmts and pointers:
6770 RELATED_STMT VEC_STMT
6771 VS1_0: vx0 = memref0 VS1_1 -
6772 VS1_1: vx1 = memref1 VS1_2 -
6773 VS1_2: vx2 = memref2 VS1_3 -
6774 VS1_3: vx3 = memref3 - -
6775 S1: x = load - VS1_0
6776 S2: z = x + 1 - -
6778 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6779 information we recorded in RELATED_STMT field is used to vectorize
6780 stmt S2. */
6782 /* In case of interleaving (non-unit grouped access):
6784 S1: x2 = &base + 2
6785 S2: x0 = &base
6786 S3: x1 = &base + 1
6787 S4: x3 = &base + 3
6789 Vectorized loads are created in the order of memory accesses
6790 starting from the access of the first stmt of the chain:
6792 VS1: vx0 = &base
6793 VS2: vx1 = &base + vec_size*1
6794 VS3: vx3 = &base + vec_size*2
6795 VS4: vx4 = &base + vec_size*3
6797 Then permutation statements are generated:
6799 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6800 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6803 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6804 (the order of the data-refs in the output of vect_permute_load_chain
6805 corresponds to the order of scalar stmts in the interleaving chain - see
6806 the documentation of vect_permute_load_chain()).
6807 The generation of permutation stmts and recording them in
6808 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6810 In case of both multiple types and interleaving, the vector loads and
6811 permutation stmts above are created for every copy. The result vector
6812 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6813 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6815 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6816 on a target that supports unaligned accesses (dr_unaligned_supported)
6817 we generate the following code:
6818 p = initial_addr;
6819 indx = 0;
6820 loop {
6821 p = p + indx * vectype_size;
6822 vec_dest = *(p);
6823 indx = indx + 1;
6826 Otherwise, the data reference is potentially unaligned on a target that
6827 does not support unaligned accesses (dr_explicit_realign_optimized) -
6828 then generate the following code, in which the data in each iteration is
6829 obtained by two vector loads, one from the previous iteration, and one
6830 from the current iteration:
6831 p1 = initial_addr;
6832 msq_init = *(floor(p1))
6833 p2 = initial_addr + VS - 1;
6834 realignment_token = call target_builtin;
6835 indx = 0;
6836 loop {
6837 p2 = p2 + indx * vectype_size
6838 lsq = *(floor(p2))
6839 vec_dest = realign_load (msq, lsq, realignment_token)
6840 indx = indx + 1;
6841 msq = lsq;
6842 } */
6844 /* If the misalignment remains the same throughout the execution of the
6845 loop, we can create the init_addr and permutation mask at the loop
6846 preheader. Otherwise, it needs to be created inside the loop.
6847 This can only occur when vectorizing memory accesses in the inner-loop
6848 nested within an outer-loop that is being vectorized. */
6850 if (nested_in_vect_loop
6851 && (TREE_INT_CST_LOW (DR_STEP (dr))
6852 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6854 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6855 compute_in_loop = true;
6858 if ((alignment_support_scheme == dr_explicit_realign_optimized
6859 || alignment_support_scheme == dr_explicit_realign)
6860 && !compute_in_loop)
6862 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6863 alignment_support_scheme, NULL_TREE,
6864 &at_loop);
6865 if (alignment_support_scheme == dr_explicit_realign_optimized)
6867 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
6868 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
6869 size_one_node);
6872 else
6873 at_loop = loop;
6875 if (negative)
6876 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6878 if (load_lanes_p)
6879 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6880 else
6881 aggr_type = vectype;
6883 prev_stmt_info = NULL;
6884 for (j = 0; j < ncopies; j++)
6886 /* 1. Create the vector or array pointer update chain. */
6887 if (j == 0)
6889 bool simd_lane_access_p
6890 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6891 if (simd_lane_access_p
6892 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6893 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6894 && integer_zerop (DR_OFFSET (first_dr))
6895 && integer_zerop (DR_INIT (first_dr))
6896 && alias_sets_conflict_p (get_alias_set (aggr_type),
6897 get_alias_set (DR_REF (first_dr)))
6898 && (alignment_support_scheme == dr_aligned
6899 || alignment_support_scheme == dr_unaligned_supported))
6901 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6902 dataref_offset = build_int_cst (reference_alias_ptr_type
6903 (DR_REF (first_dr)), 0);
6904 inv_p = false;
6906 else
6907 dataref_ptr
6908 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6909 offset, &dummy, gsi, &ptr_incr,
6910 simd_lane_access_p, &inv_p,
6911 byte_offset);
6913 else if (dataref_offset)
6914 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6915 TYPE_SIZE_UNIT (aggr_type));
6916 else
6917 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6918 TYPE_SIZE_UNIT (aggr_type));
6920 if (grouped_load || slp_perm)
6921 dr_chain.create (vec_num);
6923 if (load_lanes_p)
6925 tree vec_array;
6927 vec_array = create_vector_array (vectype, vec_num);
6929 /* Emit:
6930 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6931 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6932 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6933 gimple_call_set_lhs (new_stmt, vec_array);
6934 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6936 /* Extract each vector into an SSA_NAME. */
6937 for (i = 0; i < vec_num; i++)
6939 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6940 vec_array, i);
6941 dr_chain.quick_push (new_temp);
6944 /* Record the mapping between SSA_NAMEs and statements. */
6945 vect_record_grouped_load_vectors (stmt, dr_chain);
6947 else
6949 for (i = 0; i < vec_num; i++)
6951 if (i > 0)
6952 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6953 stmt, NULL_TREE);
6955 /* 2. Create the vector-load in the loop. */
6956 switch (alignment_support_scheme)
6958 case dr_aligned:
6959 case dr_unaligned_supported:
6961 unsigned int align, misalign;
6963 data_ref
6964 = fold_build2 (MEM_REF, vectype, dataref_ptr,
6965 dataref_offset
6966 ? dataref_offset
6967 : build_int_cst (reference_alias_ptr_type
6968 (DR_REF (first_dr)), 0));
6969 align = TYPE_ALIGN_UNIT (vectype);
6970 if (alignment_support_scheme == dr_aligned)
6972 gcc_assert (aligned_access_p (first_dr));
6973 misalign = 0;
6975 else if (DR_MISALIGNMENT (first_dr) == -1)
6977 if (DR_VECT_AUX (first_dr)->base_element_aligned)
6978 align = TYPE_ALIGN_UNIT (elem_type);
6979 else
6980 align = (get_object_alignment (DR_REF (first_dr))
6981 / BITS_PER_UNIT);
6982 misalign = 0;
6983 TREE_TYPE (data_ref)
6984 = build_aligned_type (TREE_TYPE (data_ref),
6985 align * BITS_PER_UNIT);
6987 else
6989 TREE_TYPE (data_ref)
6990 = build_aligned_type (TREE_TYPE (data_ref),
6991 TYPE_ALIGN (elem_type));
6992 misalign = DR_MISALIGNMENT (first_dr);
6994 if (dataref_offset == NULL_TREE
6995 && TREE_CODE (dataref_ptr) == SSA_NAME)
6996 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6997 align, misalign);
6998 break;
7000 case dr_explicit_realign:
7002 tree ptr, bump;
7004 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7006 if (compute_in_loop)
7007 msq = vect_setup_realignment (first_stmt, gsi,
7008 &realignment_token,
7009 dr_explicit_realign,
7010 dataref_ptr, NULL);
7012 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7013 ptr = copy_ssa_name (dataref_ptr);
7014 else
7015 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7016 new_stmt = gimple_build_assign
7017 (ptr, BIT_AND_EXPR, dataref_ptr,
7018 build_int_cst
7019 (TREE_TYPE (dataref_ptr),
7020 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7021 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7022 data_ref
7023 = build2 (MEM_REF, vectype, ptr,
7024 build_int_cst (reference_alias_ptr_type
7025 (DR_REF (first_dr)), 0));
7026 vec_dest = vect_create_destination_var (scalar_dest,
7027 vectype);
7028 new_stmt = gimple_build_assign (vec_dest, data_ref);
7029 new_temp = make_ssa_name (vec_dest, new_stmt);
7030 gimple_assign_set_lhs (new_stmt, new_temp);
7031 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7032 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7033 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7034 msq = new_temp;
7036 bump = size_binop (MULT_EXPR, vs,
7037 TYPE_SIZE_UNIT (elem_type));
7038 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7039 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7040 new_stmt = gimple_build_assign
7041 (NULL_TREE, BIT_AND_EXPR, ptr,
7042 build_int_cst
7043 (TREE_TYPE (ptr),
7044 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7045 ptr = copy_ssa_name (ptr, new_stmt);
7046 gimple_assign_set_lhs (new_stmt, ptr);
7047 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7048 data_ref
7049 = build2 (MEM_REF, vectype, ptr,
7050 build_int_cst (reference_alias_ptr_type
7051 (DR_REF (first_dr)), 0));
7052 break;
7054 case dr_explicit_realign_optimized:
7055 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7056 new_temp = copy_ssa_name (dataref_ptr);
7057 else
7058 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7059 new_stmt = gimple_build_assign
7060 (new_temp, BIT_AND_EXPR, dataref_ptr,
7061 build_int_cst
7062 (TREE_TYPE (dataref_ptr),
7063 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7064 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7065 data_ref
7066 = build2 (MEM_REF, vectype, new_temp,
7067 build_int_cst (reference_alias_ptr_type
7068 (DR_REF (first_dr)), 0));
7069 break;
7070 default:
7071 gcc_unreachable ();
7073 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7074 new_stmt = gimple_build_assign (vec_dest, data_ref);
7075 new_temp = make_ssa_name (vec_dest, new_stmt);
7076 gimple_assign_set_lhs (new_stmt, new_temp);
7077 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7079 /* 3. Handle explicit realignment if necessary/supported.
7080 Create in loop:
7081 vec_dest = realign_load (msq, lsq, realignment_token) */
7082 if (alignment_support_scheme == dr_explicit_realign_optimized
7083 || alignment_support_scheme == dr_explicit_realign)
7085 lsq = gimple_assign_lhs (new_stmt);
7086 if (!realignment_token)
7087 realignment_token = dataref_ptr;
7088 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7089 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7090 msq, lsq, realignment_token);
7091 new_temp = make_ssa_name (vec_dest, new_stmt);
7092 gimple_assign_set_lhs (new_stmt, new_temp);
7093 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7095 if (alignment_support_scheme == dr_explicit_realign_optimized)
7097 gcc_assert (phi);
7098 if (i == vec_num - 1 && j == ncopies - 1)
7099 add_phi_arg (phi, lsq,
7100 loop_latch_edge (containing_loop),
7101 UNKNOWN_LOCATION);
7102 msq = lsq;
7106 /* 4. Handle invariant-load. */
7107 if (inv_p && !bb_vinfo)
7109 gcc_assert (!grouped_load);
7110 /* If we have versioned for aliasing or the loop doesn't
7111 have any data dependencies that would preclude this,
7112 then we are sure this is a loop invariant load and
7113 thus we can insert it on the preheader edge. */
7114 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7115 && !nested_in_vect_loop
7116 && hoist_defs_of_uses (stmt, loop))
7118 if (dump_enabled_p ())
7120 dump_printf_loc (MSG_NOTE, vect_location,
7121 "hoisting out of the vectorized "
7122 "loop: ");
7123 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7125 tree tem = copy_ssa_name (scalar_dest);
7126 gsi_insert_on_edge_immediate
7127 (loop_preheader_edge (loop),
7128 gimple_build_assign (tem,
7129 unshare_expr
7130 (gimple_assign_rhs1 (stmt))));
7131 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7133 else
7135 gimple_stmt_iterator gsi2 = *gsi;
7136 gsi_next (&gsi2);
7137 new_temp = vect_init_vector (stmt, scalar_dest,
7138 vectype, &gsi2);
7140 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7141 set_vinfo_for_stmt (new_stmt,
7142 new_stmt_vec_info (new_stmt, vinfo));
7145 if (negative)
7147 tree perm_mask = perm_mask_for_reverse (vectype);
7148 new_temp = permute_vec_elements (new_temp, new_temp,
7149 perm_mask, stmt, gsi);
7150 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7153 /* Collect vector loads and later create their permutation in
7154 vect_transform_grouped_load (). */
7155 if (grouped_load || slp_perm)
7156 dr_chain.quick_push (new_temp);
7158 /* Store vector loads in the corresponding SLP_NODE. */
7159 if (slp && !slp_perm)
7160 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7162 /* Bump the vector pointer to account for a gap or for excess
7163 elements loaded for a permuted SLP load. */
7164 if (group_gap_adj != 0)
7166 bool ovf;
7167 tree bump
7168 = wide_int_to_tree (sizetype,
7169 wi::smul (TYPE_SIZE_UNIT (elem_type),
7170 group_gap_adj, &ovf));
7171 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7172 stmt, bump);
7176 if (slp && !slp_perm)
7177 continue;
7179 if (slp_perm)
7181 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7182 slp_node_instance, false))
7184 dr_chain.release ();
7185 return false;
7188 else
7190 if (grouped_load)
7192 if (!load_lanes_p)
7193 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7194 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7196 else
7198 if (j == 0)
7199 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7200 else
7201 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7202 prev_stmt_info = vinfo_for_stmt (new_stmt);
7205 dr_chain.release ();
7208 return true;
7211 /* Function vect_is_simple_cond.
7213 Input:
7214 LOOP - the loop that is being vectorized.
7215 COND - Condition that is checked for simple use.
7217 Output:
7218 *COMP_VECTYPE - the vector type for the comparison.
7220 Returns whether a COND can be vectorized. Checks whether
7221 condition operands are supportable using vec_is_simple_use. */
7223 static bool
7224 vect_is_simple_cond (tree cond, gimple *stmt, vec_info *vinfo,
7225 tree *comp_vectype)
7227 tree lhs, rhs;
7228 tree def;
7229 enum vect_def_type dt;
7230 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7232 if (!COMPARISON_CLASS_P (cond))
7233 return false;
7235 lhs = TREE_OPERAND (cond, 0);
7236 rhs = TREE_OPERAND (cond, 1);
7238 if (TREE_CODE (lhs) == SSA_NAME)
7240 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7241 if (!vect_is_simple_use_1 (lhs, stmt, vinfo,
7242 &lhs_def_stmt, &def, &dt, &vectype1))
7243 return false;
7245 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
7246 && TREE_CODE (lhs) != FIXED_CST)
7247 return false;
7249 if (TREE_CODE (rhs) == SSA_NAME)
7251 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7252 if (!vect_is_simple_use_1 (rhs, stmt, vinfo,
7253 &rhs_def_stmt, &def, &dt, &vectype2))
7254 return false;
7256 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
7257 && TREE_CODE (rhs) != FIXED_CST)
7258 return false;
7260 *comp_vectype = vectype1 ? vectype1 : vectype2;
7261 return true;
7264 /* vectorizable_condition.
7266 Check if STMT is conditional modify expression that can be vectorized.
7267 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7268 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7269 at GSI.
7271 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7272 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7273 else clause if it is 2).
7275 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7277 bool
7278 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7279 gimple **vec_stmt, tree reduc_def, int reduc_index,
7280 slp_tree slp_node)
7282 tree scalar_dest = NULL_TREE;
7283 tree vec_dest = NULL_TREE;
7284 tree cond_expr, then_clause, else_clause;
7285 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7286 tree comp_vectype = NULL_TREE;
7287 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7288 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7289 tree vec_compare, vec_cond_expr;
7290 tree new_temp;
7291 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7292 tree def;
7293 enum vect_def_type dt, dts[4];
7294 int ncopies;
7295 enum tree_code code;
7296 stmt_vec_info prev_stmt_info = NULL;
7297 int i, j;
7298 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7299 vec<tree> vec_oprnds0 = vNULL;
7300 vec<tree> vec_oprnds1 = vNULL;
7301 vec<tree> vec_oprnds2 = vNULL;
7302 vec<tree> vec_oprnds3 = vNULL;
7303 tree vec_cmp_type;
7305 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7306 return false;
7308 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7309 return false;
7311 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7312 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7313 && reduc_def))
7314 return false;
7316 /* FORNOW: not yet supported. */
7317 if (STMT_VINFO_LIVE_P (stmt_info))
7319 if (dump_enabled_p ())
7320 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7321 "value used after loop.\n");
7322 return false;
7325 /* Is vectorizable conditional operation? */
7326 if (!is_gimple_assign (stmt))
7327 return false;
7329 code = gimple_assign_rhs_code (stmt);
7331 if (code != COND_EXPR)
7332 return false;
7334 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7335 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
7337 if (slp_node || PURE_SLP_STMT (stmt_info))
7338 ncopies = 1;
7339 else
7340 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7342 gcc_assert (ncopies >= 1);
7343 if (reduc_index && ncopies > 1)
7344 return false; /* FORNOW */
7346 cond_expr = gimple_assign_rhs1 (stmt);
7347 then_clause = gimple_assign_rhs2 (stmt);
7348 else_clause = gimple_assign_rhs3 (stmt);
7350 if (!vect_is_simple_cond (cond_expr, stmt, stmt_info->vinfo, &comp_vectype)
7351 || !comp_vectype)
7352 return false;
7354 if (TREE_CODE (then_clause) == SSA_NAME)
7356 gimple *then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
7357 if (!vect_is_simple_use (then_clause, stmt, stmt_info->vinfo,
7358 &then_def_stmt, &def, &dt))
7359 return false;
7361 else if (TREE_CODE (then_clause) != INTEGER_CST
7362 && TREE_CODE (then_clause) != REAL_CST
7363 && TREE_CODE (then_clause) != FIXED_CST)
7364 return false;
7366 if (TREE_CODE (else_clause) == SSA_NAME)
7368 gimple *else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
7369 if (!vect_is_simple_use (else_clause, stmt, stmt_info->vinfo,
7370 &else_def_stmt, &def, &dt))
7371 return false;
7373 else if (TREE_CODE (else_clause) != INTEGER_CST
7374 && TREE_CODE (else_clause) != REAL_CST
7375 && TREE_CODE (else_clause) != FIXED_CST)
7376 return false;
7378 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
7379 /* The result of a vector comparison should be signed type. */
7380 tree cmp_type = build_nonstandard_integer_type (prec, 0);
7381 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
7382 if (vec_cmp_type == NULL_TREE)
7383 return false;
7385 if (!vec_stmt)
7387 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7388 return expand_vec_cond_expr_p (vectype, comp_vectype);
7391 /* Transform. */
7393 if (!slp_node)
7395 vec_oprnds0.create (1);
7396 vec_oprnds1.create (1);
7397 vec_oprnds2.create (1);
7398 vec_oprnds3.create (1);
7401 /* Handle def. */
7402 scalar_dest = gimple_assign_lhs (stmt);
7403 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7405 /* Handle cond expr. */
7406 for (j = 0; j < ncopies; j++)
7408 gassign *new_stmt = NULL;
7409 if (j == 0)
7411 if (slp_node)
7413 auto_vec<tree, 4> ops;
7414 auto_vec<vec<tree>, 4> vec_defs;
7416 ops.safe_push (TREE_OPERAND (cond_expr, 0));
7417 ops.safe_push (TREE_OPERAND (cond_expr, 1));
7418 ops.safe_push (then_clause);
7419 ops.safe_push (else_clause);
7420 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7421 vec_oprnds3 = vec_defs.pop ();
7422 vec_oprnds2 = vec_defs.pop ();
7423 vec_oprnds1 = vec_defs.pop ();
7424 vec_oprnds0 = vec_defs.pop ();
7426 ops.release ();
7427 vec_defs.release ();
7429 else
7431 gimple *gtemp;
7432 vec_cond_lhs =
7433 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
7434 stmt, NULL);
7435 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
7436 loop_vinfo, &gtemp, &def, &dts[0]);
7438 vec_cond_rhs =
7439 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
7440 stmt, NULL);
7441 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
7442 loop_vinfo, &gtemp, &def, &dts[1]);
7443 if (reduc_index == 1)
7444 vec_then_clause = reduc_def;
7445 else
7447 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
7448 stmt, NULL);
7449 vect_is_simple_use (then_clause, stmt, loop_vinfo,
7450 &gtemp, &def, &dts[2]);
7452 if (reduc_index == 2)
7453 vec_else_clause = reduc_def;
7454 else
7456 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
7457 stmt, NULL);
7458 vect_is_simple_use (else_clause, stmt, loop_vinfo,
7459 &gtemp, &def, &dts[3]);
7463 else
7465 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
7466 vec_oprnds0.pop ());
7467 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
7468 vec_oprnds1.pop ());
7469 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
7470 vec_oprnds2.pop ());
7471 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
7472 vec_oprnds3.pop ());
7475 if (!slp_node)
7477 vec_oprnds0.quick_push (vec_cond_lhs);
7478 vec_oprnds1.quick_push (vec_cond_rhs);
7479 vec_oprnds2.quick_push (vec_then_clause);
7480 vec_oprnds3.quick_push (vec_else_clause);
7483 /* Arguments are ready. Create the new vector stmt. */
7484 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
7486 vec_cond_rhs = vec_oprnds1[i];
7487 vec_then_clause = vec_oprnds2[i];
7488 vec_else_clause = vec_oprnds3[i];
7490 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
7491 vec_cond_lhs, vec_cond_rhs);
7492 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
7493 vec_compare, vec_then_clause, vec_else_clause);
7495 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
7496 new_temp = make_ssa_name (vec_dest, new_stmt);
7497 gimple_assign_set_lhs (new_stmt, new_temp);
7498 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7499 if (slp_node)
7500 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7503 if (slp_node)
7504 continue;
7506 if (j == 0)
7507 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7508 else
7509 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7511 prev_stmt_info = vinfo_for_stmt (new_stmt);
7514 vec_oprnds0.release ();
7515 vec_oprnds1.release ();
7516 vec_oprnds2.release ();
7517 vec_oprnds3.release ();
7519 return true;
7523 /* Make sure the statement is vectorizable. */
7525 bool
7526 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
7528 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7529 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7530 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
7531 bool ok;
7532 tree scalar_type, vectype;
7533 gimple *pattern_stmt;
7534 gimple_seq pattern_def_seq;
7536 if (dump_enabled_p ())
7538 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
7539 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7542 if (gimple_has_volatile_ops (stmt))
7544 if (dump_enabled_p ())
7545 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7546 "not vectorized: stmt has volatile operands\n");
7548 return false;
7551 /* Skip stmts that do not need to be vectorized. In loops this is expected
7552 to include:
7553 - the COND_EXPR which is the loop exit condition
7554 - any LABEL_EXPRs in the loop
7555 - computations that are used only for array indexing or loop control.
7556 In basic blocks we only analyze statements that are a part of some SLP
7557 instance, therefore, all the statements are relevant.
7559 Pattern statement needs to be analyzed instead of the original statement
7560 if the original statement is not relevant. Otherwise, we analyze both
7561 statements. In basic blocks we are called from some SLP instance
7562 traversal, don't analyze pattern stmts instead, the pattern stmts
7563 already will be part of SLP instance. */
7565 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
7566 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7567 && !STMT_VINFO_LIVE_P (stmt_info))
7569 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7570 && pattern_stmt
7571 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7572 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7574 /* Analyze PATTERN_STMT instead of the original stmt. */
7575 stmt = pattern_stmt;
7576 stmt_info = vinfo_for_stmt (pattern_stmt);
7577 if (dump_enabled_p ())
7579 dump_printf_loc (MSG_NOTE, vect_location,
7580 "==> examining pattern statement: ");
7581 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7584 else
7586 if (dump_enabled_p ())
7587 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
7589 return true;
7592 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7593 && node == NULL
7594 && pattern_stmt
7595 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7596 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7598 /* Analyze PATTERN_STMT too. */
7599 if (dump_enabled_p ())
7601 dump_printf_loc (MSG_NOTE, vect_location,
7602 "==> examining pattern statement: ");
7603 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7606 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7607 return false;
7610 if (is_pattern_stmt_p (stmt_info)
7611 && node == NULL
7612 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
7614 gimple_stmt_iterator si;
7616 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7618 gimple *pattern_def_stmt = gsi_stmt (si);
7619 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7620 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7622 /* Analyze def stmt of STMT if it's a pattern stmt. */
7623 if (dump_enabled_p ())
7625 dump_printf_loc (MSG_NOTE, vect_location,
7626 "==> examining pattern def statement: ");
7627 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
7630 if (!vect_analyze_stmt (pattern_def_stmt,
7631 need_to_vectorize, node))
7632 return false;
7637 switch (STMT_VINFO_DEF_TYPE (stmt_info))
7639 case vect_internal_def:
7640 break;
7642 case vect_reduction_def:
7643 case vect_nested_cycle:
7644 gcc_assert (!bb_vinfo
7645 && (relevance == vect_used_in_outer
7646 || relevance == vect_used_in_outer_by_reduction
7647 || relevance == vect_used_by_reduction
7648 || relevance == vect_unused_in_scope));
7649 break;
7651 case vect_induction_def:
7652 case vect_constant_def:
7653 case vect_external_def:
7654 case vect_unknown_def_type:
7655 default:
7656 gcc_unreachable ();
7659 if (bb_vinfo)
7661 gcc_assert (PURE_SLP_STMT (stmt_info));
7663 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
7664 if (dump_enabled_p ())
7666 dump_printf_loc (MSG_NOTE, vect_location,
7667 "get vectype for scalar type: ");
7668 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
7669 dump_printf (MSG_NOTE, "\n");
7672 vectype = get_vectype_for_scalar_type (scalar_type);
7673 if (!vectype)
7675 if (dump_enabled_p ())
7677 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7678 "not SLPed: unsupported data-type ");
7679 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7680 scalar_type);
7681 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7683 return false;
7686 if (dump_enabled_p ())
7688 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7689 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
7690 dump_printf (MSG_NOTE, "\n");
7693 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7696 if (STMT_VINFO_RELEVANT_P (stmt_info))
7698 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
7699 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7700 || (is_gimple_call (stmt)
7701 && gimple_call_lhs (stmt) == NULL_TREE));
7702 *need_to_vectorize = true;
7705 if (PURE_SLP_STMT (stmt_info) && !node)
7707 dump_printf_loc (MSG_NOTE, vect_location,
7708 "handled only by SLP analysis\n");
7709 return true;
7712 ok = true;
7713 if (!bb_vinfo
7714 && (STMT_VINFO_RELEVANT_P (stmt_info)
7715 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
7716 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7717 || vectorizable_conversion (stmt, NULL, NULL, node)
7718 || vectorizable_shift (stmt, NULL, NULL, node)
7719 || vectorizable_operation (stmt, NULL, NULL, node)
7720 || vectorizable_assignment (stmt, NULL, NULL, node)
7721 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7722 || vectorizable_call (stmt, NULL, NULL, node)
7723 || vectorizable_store (stmt, NULL, NULL, node)
7724 || vectorizable_reduction (stmt, NULL, NULL, node)
7725 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7726 else
7728 if (bb_vinfo)
7729 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7730 || vectorizable_conversion (stmt, NULL, NULL, node)
7731 || vectorizable_shift (stmt, NULL, NULL, node)
7732 || vectorizable_operation (stmt, NULL, NULL, node)
7733 || vectorizable_assignment (stmt, NULL, NULL, node)
7734 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7735 || vectorizable_call (stmt, NULL, NULL, node)
7736 || vectorizable_store (stmt, NULL, NULL, node)
7737 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7740 if (!ok)
7742 if (dump_enabled_p ())
7744 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7745 "not vectorized: relevant stmt not ");
7746 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7747 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7750 return false;
7753 if (bb_vinfo)
7754 return true;
7756 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7757 need extra handling, except for vectorizable reductions. */
7758 if (STMT_VINFO_LIVE_P (stmt_info)
7759 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7760 ok = vectorizable_live_operation (stmt, NULL, NULL);
7762 if (!ok)
7764 if (dump_enabled_p ())
7766 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7767 "not vectorized: live stmt not ");
7768 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7769 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7772 return false;
7775 return true;
7779 /* Function vect_transform_stmt.
7781 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7783 bool
7784 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
7785 bool *grouped_store, slp_tree slp_node,
7786 slp_instance slp_node_instance)
7788 bool is_store = false;
7789 gimple *vec_stmt = NULL;
7790 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7791 bool done;
7793 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7795 switch (STMT_VINFO_TYPE (stmt_info))
7797 case type_demotion_vec_info_type:
7798 case type_promotion_vec_info_type:
7799 case type_conversion_vec_info_type:
7800 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7801 gcc_assert (done);
7802 break;
7804 case induc_vec_info_type:
7805 gcc_assert (!slp_node);
7806 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7807 gcc_assert (done);
7808 break;
7810 case shift_vec_info_type:
7811 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7812 gcc_assert (done);
7813 break;
7815 case op_vec_info_type:
7816 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7817 gcc_assert (done);
7818 break;
7820 case assignment_vec_info_type:
7821 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7822 gcc_assert (done);
7823 break;
7825 case load_vec_info_type:
7826 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
7827 slp_node_instance);
7828 gcc_assert (done);
7829 break;
7831 case store_vec_info_type:
7832 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7833 gcc_assert (done);
7834 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
7836 /* In case of interleaving, the whole chain is vectorized when the
7837 last store in the chain is reached. Store stmts before the last
7838 one are skipped, and there vec_stmt_info shouldn't be freed
7839 meanwhile. */
7840 *grouped_store = true;
7841 if (STMT_VINFO_VEC_STMT (stmt_info))
7842 is_store = true;
7844 else
7845 is_store = true;
7846 break;
7848 case condition_vec_info_type:
7849 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
7850 gcc_assert (done);
7851 break;
7853 case call_vec_info_type:
7854 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
7855 stmt = gsi_stmt (*gsi);
7856 if (is_gimple_call (stmt)
7857 && gimple_call_internal_p (stmt)
7858 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7859 is_store = true;
7860 break;
7862 case call_simd_clone_vec_info_type:
7863 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7864 stmt = gsi_stmt (*gsi);
7865 break;
7867 case reduc_vec_info_type:
7868 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
7869 gcc_assert (done);
7870 break;
7872 default:
7873 if (!STMT_VINFO_LIVE_P (stmt_info))
7875 if (dump_enabled_p ())
7876 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7877 "stmt not supported.\n");
7878 gcc_unreachable ();
7882 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
7883 This would break hybrid SLP vectorization. */
7884 if (slp_node)
7885 gcc_assert (!vec_stmt
7886 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
7888 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7889 is being vectorized, but outside the immediately enclosing loop. */
7890 if (vec_stmt
7891 && STMT_VINFO_LOOP_VINFO (stmt_info)
7892 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7893 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
7894 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7895 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
7896 || STMT_VINFO_RELEVANT (stmt_info) ==
7897 vect_used_in_outer_by_reduction))
7899 struct loop *innerloop = LOOP_VINFO_LOOP (
7900 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
7901 imm_use_iterator imm_iter;
7902 use_operand_p use_p;
7903 tree scalar_dest;
7904 gimple *exit_phi;
7906 if (dump_enabled_p ())
7907 dump_printf_loc (MSG_NOTE, vect_location,
7908 "Record the vdef for outer-loop vectorization.\n");
7910 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7911 (to be used when vectorizing outer-loop stmts that use the DEF of
7912 STMT). */
7913 if (gimple_code (stmt) == GIMPLE_PHI)
7914 scalar_dest = PHI_RESULT (stmt);
7915 else
7916 scalar_dest = gimple_assign_lhs (stmt);
7918 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7920 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7922 exit_phi = USE_STMT (use_p);
7923 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7928 /* Handle stmts whose DEF is used outside the loop-nest that is
7929 being vectorized. */
7930 if (STMT_VINFO_LIVE_P (stmt_info)
7931 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7933 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7934 gcc_assert (done);
7937 if (vec_stmt)
7938 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
7940 return is_store;
7944 /* Remove a group of stores (for SLP or interleaving), free their
7945 stmt_vec_info. */
7947 void
7948 vect_remove_stores (gimple *first_stmt)
7950 gimple *next = first_stmt;
7951 gimple *tmp;
7952 gimple_stmt_iterator next_si;
7954 while (next)
7956 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7958 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7959 if (is_pattern_stmt_p (stmt_info))
7960 next = STMT_VINFO_RELATED_STMT (stmt_info);
7961 /* Free the attached stmt_vec_info and remove the stmt. */
7962 next_si = gsi_for_stmt (next);
7963 unlink_stmt_vdef (next);
7964 gsi_remove (&next_si, true);
7965 release_defs (next);
7966 free_stmt_vec_info (next);
7967 next = tmp;
7972 /* Function new_stmt_vec_info.
7974 Create and initialize a new stmt_vec_info struct for STMT. */
7976 stmt_vec_info
7977 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
7979 stmt_vec_info res;
7980 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7982 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7983 STMT_VINFO_STMT (res) = stmt;
7984 res->vinfo = vinfo;
7985 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
7986 STMT_VINFO_LIVE_P (res) = false;
7987 STMT_VINFO_VECTYPE (res) = NULL;
7988 STMT_VINFO_VEC_STMT (res) = NULL;
7989 STMT_VINFO_VECTORIZABLE (res) = true;
7990 STMT_VINFO_IN_PATTERN_P (res) = false;
7991 STMT_VINFO_RELATED_STMT (res) = NULL;
7992 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
7993 STMT_VINFO_DATA_REF (res) = NULL;
7995 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7996 STMT_VINFO_DR_OFFSET (res) = NULL;
7997 STMT_VINFO_DR_INIT (res) = NULL;
7998 STMT_VINFO_DR_STEP (res) = NULL;
7999 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
8001 if (gimple_code (stmt) == GIMPLE_PHI
8002 && is_loop_header_bb_p (gimple_bb (stmt)))
8003 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8004 else
8005 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8007 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
8008 STMT_SLP_TYPE (res) = loop_vect;
8009 GROUP_FIRST_ELEMENT (res) = NULL;
8010 GROUP_NEXT_ELEMENT (res) = NULL;
8011 GROUP_SIZE (res) = 0;
8012 GROUP_STORE_COUNT (res) = 0;
8013 GROUP_GAP (res) = 0;
8014 GROUP_SAME_DR_STMT (res) = NULL;
8016 return res;
8020 /* Create a hash table for stmt_vec_info. */
8022 void
8023 init_stmt_vec_info_vec (void)
8025 gcc_assert (!stmt_vec_info_vec.exists ());
8026 stmt_vec_info_vec.create (50);
8030 /* Free hash table for stmt_vec_info. */
8032 void
8033 free_stmt_vec_info_vec (void)
8035 unsigned int i;
8036 stmt_vec_info info;
8037 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
8038 if (info != NULL)
8039 free_stmt_vec_info (STMT_VINFO_STMT (info));
8040 gcc_assert (stmt_vec_info_vec.exists ());
8041 stmt_vec_info_vec.release ();
8045 /* Free stmt vectorization related info. */
8047 void
8048 free_stmt_vec_info (gimple *stmt)
8050 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8052 if (!stmt_info)
8053 return;
8055 /* Check if this statement has a related "pattern stmt"
8056 (introduced by the vectorizer during the pattern recognition
8057 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8058 too. */
8059 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
8061 stmt_vec_info patt_info
8062 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8063 if (patt_info)
8065 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
8066 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
8067 gimple_set_bb (patt_stmt, NULL);
8068 tree lhs = gimple_get_lhs (patt_stmt);
8069 if (TREE_CODE (lhs) == SSA_NAME)
8070 release_ssa_name (lhs);
8071 if (seq)
8073 gimple_stmt_iterator si;
8074 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
8076 gimple *seq_stmt = gsi_stmt (si);
8077 gimple_set_bb (seq_stmt, NULL);
8078 lhs = gimple_get_lhs (patt_stmt);
8079 if (TREE_CODE (lhs) == SSA_NAME)
8080 release_ssa_name (lhs);
8081 free_stmt_vec_info (seq_stmt);
8084 free_stmt_vec_info (patt_stmt);
8088 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
8089 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
8090 set_vinfo_for_stmt (stmt, NULL);
8091 free (stmt_info);
8095 /* Function get_vectype_for_scalar_type_and_size.
8097 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8098 by the target. */
8100 static tree
8101 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
8103 machine_mode inner_mode = TYPE_MODE (scalar_type);
8104 machine_mode simd_mode;
8105 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
8106 int nunits;
8107 tree vectype;
8109 if (nbytes == 0)
8110 return NULL_TREE;
8112 if (GET_MODE_CLASS (inner_mode) != MODE_INT
8113 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
8114 return NULL_TREE;
8116 /* For vector types of elements whose mode precision doesn't
8117 match their types precision we use a element type of mode
8118 precision. The vectorization routines will have to make sure
8119 they support the proper result truncation/extension.
8120 We also make sure to build vector types with INTEGER_TYPE
8121 component type only. */
8122 if (INTEGRAL_TYPE_P (scalar_type)
8123 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
8124 || TREE_CODE (scalar_type) != INTEGER_TYPE))
8125 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
8126 TYPE_UNSIGNED (scalar_type));
8128 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8129 When the component mode passes the above test simply use a type
8130 corresponding to that mode. The theory is that any use that
8131 would cause problems with this will disable vectorization anyway. */
8132 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
8133 && !INTEGRAL_TYPE_P (scalar_type))
8134 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
8136 /* We can't build a vector type of elements with alignment bigger than
8137 their size. */
8138 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
8139 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
8140 TYPE_UNSIGNED (scalar_type));
8142 /* If we felt back to using the mode fail if there was
8143 no scalar type for it. */
8144 if (scalar_type == NULL_TREE)
8145 return NULL_TREE;
8147 /* If no size was supplied use the mode the target prefers. Otherwise
8148 lookup a vector mode of the specified size. */
8149 if (size == 0)
8150 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
8151 else
8152 simd_mode = mode_for_vector (inner_mode, size / nbytes);
8153 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
8154 if (nunits <= 1)
8155 return NULL_TREE;
8157 vectype = build_vector_type (scalar_type, nunits);
8159 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8160 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
8161 return NULL_TREE;
8163 return vectype;
8166 unsigned int current_vector_size;
8168 /* Function get_vectype_for_scalar_type.
8170 Returns the vector type corresponding to SCALAR_TYPE as supported
8171 by the target. */
8173 tree
8174 get_vectype_for_scalar_type (tree scalar_type)
8176 tree vectype;
8177 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
8178 current_vector_size);
8179 if (vectype
8180 && current_vector_size == 0)
8181 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
8182 return vectype;
8185 /* Function get_same_sized_vectype
8187 Returns a vector type corresponding to SCALAR_TYPE of size
8188 VECTOR_TYPE if supported by the target. */
8190 tree
8191 get_same_sized_vectype (tree scalar_type, tree vector_type)
8193 return get_vectype_for_scalar_type_and_size
8194 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
8197 /* Function vect_is_simple_use.
8199 Input:
8200 LOOP_VINFO - the vect info of the loop that is being vectorized.
8201 BB_VINFO - the vect info of the basic block that is being vectorized.
8202 OPERAND - operand of STMT in the loop or bb.
8203 DEF - the defining stmt in case OPERAND is an SSA_NAME.
8205 Returns whether a stmt with OPERAND can be vectorized.
8206 For loops, supportable operands are constants, loop invariants, and operands
8207 that are defined by the current iteration of the loop. Unsupportable
8208 operands are those that are defined by a previous iteration of the loop (as
8209 is the case in reduction/induction computations).
8210 For basic blocks, supportable operands are constants and bb invariants.
8211 For now, operands defined outside the basic block are not supported. */
8213 bool
8214 vect_is_simple_use (tree operand, gimple *stmt, vec_info *vinfo,
8215 gimple **def_stmt, tree *def, enum vect_def_type *dt)
8217 *def_stmt = NULL;
8218 *def = NULL_TREE;
8219 *dt = vect_unknown_def_type;
8221 if (dump_enabled_p ())
8223 dump_printf_loc (MSG_NOTE, vect_location,
8224 "vect_is_simple_use: operand ");
8225 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
8226 dump_printf (MSG_NOTE, "\n");
8229 if (CONSTANT_CLASS_P (operand))
8231 *dt = vect_constant_def;
8232 return true;
8235 if (is_gimple_min_invariant (operand))
8237 *def = operand;
8238 *dt = vect_external_def;
8239 return true;
8242 if (TREE_CODE (operand) != SSA_NAME)
8244 if (dump_enabled_p ())
8245 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8246 "not ssa-name.\n");
8247 return false;
8250 if (SSA_NAME_IS_DEFAULT_DEF (operand))
8252 *def = operand;
8253 *dt = vect_external_def;
8254 return true;
8257 *def_stmt = SSA_NAME_DEF_STMT (operand);
8258 if (dump_enabled_p ())
8260 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
8261 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
8264 basic_block bb = gimple_bb (*def_stmt);
8265 if ((is_a <loop_vec_info> (vinfo)
8266 && !flow_bb_inside_loop_p (as_a <loop_vec_info> (vinfo)->loop, bb))
8267 || (is_a <bb_vec_info> (vinfo)
8268 && (bb != as_a <bb_vec_info> (vinfo)->bb
8269 || gimple_code (*def_stmt) == GIMPLE_PHI)))
8270 *dt = vect_external_def;
8271 else
8273 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
8274 if (is_a <bb_vec_info> (vinfo) && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
8275 *dt = vect_external_def;
8276 else
8277 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
8280 if (dump_enabled_p ())
8282 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
8283 switch (*dt)
8285 case vect_uninitialized_def:
8286 dump_printf (MSG_NOTE, "uninitialized\n");
8287 break;
8288 case vect_constant_def:
8289 dump_printf (MSG_NOTE, "constant\n");
8290 break;
8291 case vect_external_def:
8292 dump_printf (MSG_NOTE, "external\n");
8293 break;
8294 case vect_internal_def:
8295 dump_printf (MSG_NOTE, "internal\n");
8296 break;
8297 case vect_induction_def:
8298 dump_printf (MSG_NOTE, "induction\n");
8299 break;
8300 case vect_reduction_def:
8301 dump_printf (MSG_NOTE, "reduction\n");
8302 break;
8303 case vect_double_reduction_def:
8304 dump_printf (MSG_NOTE, "double reduction\n");
8305 break;
8306 case vect_nested_cycle:
8307 dump_printf (MSG_NOTE, "nested cycle\n");
8308 break;
8309 case vect_unknown_def_type:
8310 dump_printf (MSG_NOTE, "unknown\n");
8311 break;
8315 if (*dt == vect_unknown_def_type
8316 || (stmt
8317 && *dt == vect_double_reduction_def
8318 && gimple_code (stmt) != GIMPLE_PHI))
8320 if (dump_enabled_p ())
8321 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8322 "Unsupported pattern.\n");
8323 return false;
8326 switch (gimple_code (*def_stmt))
8328 case GIMPLE_PHI:
8329 *def = gimple_phi_result (*def_stmt);
8330 break;
8332 case GIMPLE_ASSIGN:
8333 *def = gimple_assign_lhs (*def_stmt);
8334 break;
8336 case GIMPLE_CALL:
8337 *def = gimple_call_lhs (*def_stmt);
8338 if (*def != NULL)
8339 break;
8340 /* FALLTHRU */
8341 default:
8342 if (dump_enabled_p ())
8343 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8344 "unsupported defining stmt:\n");
8345 return false;
8348 return true;
8351 /* Function vect_is_simple_use_1.
8353 Same as vect_is_simple_use_1 but also determines the vector operand
8354 type of OPERAND and stores it to *VECTYPE. If the definition of
8355 OPERAND is vect_uninitialized_def, vect_constant_def or
8356 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8357 is responsible to compute the best suited vector type for the
8358 scalar operand. */
8360 bool
8361 vect_is_simple_use_1 (tree operand, gimple *stmt, vec_info *vinfo,
8362 gimple **def_stmt,
8363 tree *def, enum vect_def_type *dt, tree *vectype)
8365 if (!vect_is_simple_use (operand, stmt, vinfo, def_stmt, def, dt))
8366 return false;
8368 /* Now get a vector type if the def is internal, otherwise supply
8369 NULL_TREE and leave it up to the caller to figure out a proper
8370 type for the use stmt. */
8371 if (*dt == vect_internal_def
8372 || *dt == vect_induction_def
8373 || *dt == vect_reduction_def
8374 || *dt == vect_double_reduction_def
8375 || *dt == vect_nested_cycle)
8377 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
8379 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8380 && !STMT_VINFO_RELEVANT (stmt_info)
8381 && !STMT_VINFO_LIVE_P (stmt_info))
8382 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8384 *vectype = STMT_VINFO_VECTYPE (stmt_info);
8385 gcc_assert (*vectype != NULL_TREE);
8387 else if (*dt == vect_uninitialized_def
8388 || *dt == vect_constant_def
8389 || *dt == vect_external_def)
8390 *vectype = NULL_TREE;
8391 else
8392 gcc_unreachable ();
8394 return true;
8398 /* Function supportable_widening_operation
8400 Check whether an operation represented by the code CODE is a
8401 widening operation that is supported by the target platform in
8402 vector form (i.e., when operating on arguments of type VECTYPE_IN
8403 producing a result of type VECTYPE_OUT).
8405 Widening operations we currently support are NOP (CONVERT), FLOAT
8406 and WIDEN_MULT. This function checks if these operations are supported
8407 by the target platform either directly (via vector tree-codes), or via
8408 target builtins.
8410 Output:
8411 - CODE1 and CODE2 are codes of vector operations to be used when
8412 vectorizing the operation, if available.
8413 - MULTI_STEP_CVT determines the number of required intermediate steps in
8414 case of multi-step conversion (like char->short->int - in that case
8415 MULTI_STEP_CVT will be 1).
8416 - INTERM_TYPES contains the intermediate type required to perform the
8417 widening operation (short in the above example). */
8419 bool
8420 supportable_widening_operation (enum tree_code code, gimple *stmt,
8421 tree vectype_out, tree vectype_in,
8422 enum tree_code *code1, enum tree_code *code2,
8423 int *multi_step_cvt,
8424 vec<tree> *interm_types)
8426 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8427 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
8428 struct loop *vect_loop = NULL;
8429 machine_mode vec_mode;
8430 enum insn_code icode1, icode2;
8431 optab optab1, optab2;
8432 tree vectype = vectype_in;
8433 tree wide_vectype = vectype_out;
8434 enum tree_code c1, c2;
8435 int i;
8436 tree prev_type, intermediate_type;
8437 machine_mode intermediate_mode, prev_mode;
8438 optab optab3, optab4;
8440 *multi_step_cvt = 0;
8441 if (loop_info)
8442 vect_loop = LOOP_VINFO_LOOP (loop_info);
8444 switch (code)
8446 case WIDEN_MULT_EXPR:
8447 /* The result of a vectorized widening operation usually requires
8448 two vectors (because the widened results do not fit into one vector).
8449 The generated vector results would normally be expected to be
8450 generated in the same order as in the original scalar computation,
8451 i.e. if 8 results are generated in each vector iteration, they are
8452 to be organized as follows:
8453 vect1: [res1,res2,res3,res4],
8454 vect2: [res5,res6,res7,res8].
8456 However, in the special case that the result of the widening
8457 operation is used in a reduction computation only, the order doesn't
8458 matter (because when vectorizing a reduction we change the order of
8459 the computation). Some targets can take advantage of this and
8460 generate more efficient code. For example, targets like Altivec,
8461 that support widen_mult using a sequence of {mult_even,mult_odd}
8462 generate the following vectors:
8463 vect1: [res1,res3,res5,res7],
8464 vect2: [res2,res4,res6,res8].
8466 When vectorizing outer-loops, we execute the inner-loop sequentially
8467 (each vectorized inner-loop iteration contributes to VF outer-loop
8468 iterations in parallel). We therefore don't allow to change the
8469 order of the computation in the inner-loop during outer-loop
8470 vectorization. */
8471 /* TODO: Another case in which order doesn't *really* matter is when we
8472 widen and then contract again, e.g. (short)((int)x * y >> 8).
8473 Normally, pack_trunc performs an even/odd permute, whereas the
8474 repack from an even/odd expansion would be an interleave, which
8475 would be significantly simpler for e.g. AVX2. */
8476 /* In any case, in order to avoid duplicating the code below, recurse
8477 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8478 are properly set up for the caller. If we fail, we'll continue with
8479 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8480 if (vect_loop
8481 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
8482 && !nested_in_vect_loop_p (vect_loop, stmt)
8483 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
8484 stmt, vectype_out, vectype_in,
8485 code1, code2, multi_step_cvt,
8486 interm_types))
8488 /* Elements in a vector with vect_used_by_reduction property cannot
8489 be reordered if the use chain with this property does not have the
8490 same operation. One such an example is s += a * b, where elements
8491 in a and b cannot be reordered. Here we check if the vector defined
8492 by STMT is only directly used in the reduction statement. */
8493 tree lhs = gimple_assign_lhs (stmt);
8494 use_operand_p dummy;
8495 gimple *use_stmt;
8496 stmt_vec_info use_stmt_info = NULL;
8497 if (single_imm_use (lhs, &dummy, &use_stmt)
8498 && (use_stmt_info = vinfo_for_stmt (use_stmt))
8499 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
8500 return true;
8502 c1 = VEC_WIDEN_MULT_LO_EXPR;
8503 c2 = VEC_WIDEN_MULT_HI_EXPR;
8504 break;
8506 case VEC_WIDEN_MULT_EVEN_EXPR:
8507 /* Support the recursion induced just above. */
8508 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
8509 c2 = VEC_WIDEN_MULT_ODD_EXPR;
8510 break;
8512 case WIDEN_LSHIFT_EXPR:
8513 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
8514 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
8515 break;
8517 CASE_CONVERT:
8518 c1 = VEC_UNPACK_LO_EXPR;
8519 c2 = VEC_UNPACK_HI_EXPR;
8520 break;
8522 case FLOAT_EXPR:
8523 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
8524 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
8525 break;
8527 case FIX_TRUNC_EXPR:
8528 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8529 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8530 computing the operation. */
8531 return false;
8533 default:
8534 gcc_unreachable ();
8537 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
8538 std::swap (c1, c2);
8540 if (code == FIX_TRUNC_EXPR)
8542 /* The signedness is determined from output operand. */
8543 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8544 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
8546 else
8548 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8549 optab2 = optab_for_tree_code (c2, vectype, optab_default);
8552 if (!optab1 || !optab2)
8553 return false;
8555 vec_mode = TYPE_MODE (vectype);
8556 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
8557 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
8558 return false;
8560 *code1 = c1;
8561 *code2 = c2;
8563 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8564 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8565 return true;
8567 /* Check if it's a multi-step conversion that can be done using intermediate
8568 types. */
8570 prev_type = vectype;
8571 prev_mode = vec_mode;
8573 if (!CONVERT_EXPR_CODE_P (code))
8574 return false;
8576 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8577 intermediate steps in promotion sequence. We try
8578 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8579 not. */
8580 interm_types->create (MAX_INTERM_CVT_STEPS);
8581 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8583 intermediate_mode = insn_data[icode1].operand[0].mode;
8584 intermediate_type
8585 = lang_hooks.types.type_for_mode (intermediate_mode,
8586 TYPE_UNSIGNED (prev_type));
8587 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
8588 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
8590 if (!optab3 || !optab4
8591 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
8592 || insn_data[icode1].operand[0].mode != intermediate_mode
8593 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
8594 || insn_data[icode2].operand[0].mode != intermediate_mode
8595 || ((icode1 = optab_handler (optab3, intermediate_mode))
8596 == CODE_FOR_nothing)
8597 || ((icode2 = optab_handler (optab4, intermediate_mode))
8598 == CODE_FOR_nothing))
8599 break;
8601 interm_types->quick_push (intermediate_type);
8602 (*multi_step_cvt)++;
8604 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8605 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8606 return true;
8608 prev_type = intermediate_type;
8609 prev_mode = intermediate_mode;
8612 interm_types->release ();
8613 return false;
8617 /* Function supportable_narrowing_operation
8619 Check whether an operation represented by the code CODE is a
8620 narrowing operation that is supported by the target platform in
8621 vector form (i.e., when operating on arguments of type VECTYPE_IN
8622 and producing a result of type VECTYPE_OUT).
8624 Narrowing operations we currently support are NOP (CONVERT) and
8625 FIX_TRUNC. This function checks if these operations are supported by
8626 the target platform directly via vector tree-codes.
8628 Output:
8629 - CODE1 is the code of a vector operation to be used when
8630 vectorizing the operation, if available.
8631 - MULTI_STEP_CVT determines the number of required intermediate steps in
8632 case of multi-step conversion (like int->short->char - in that case
8633 MULTI_STEP_CVT will be 1).
8634 - INTERM_TYPES contains the intermediate type required to perform the
8635 narrowing operation (short in the above example). */
8637 bool
8638 supportable_narrowing_operation (enum tree_code code,
8639 tree vectype_out, tree vectype_in,
8640 enum tree_code *code1, int *multi_step_cvt,
8641 vec<tree> *interm_types)
8643 machine_mode vec_mode;
8644 enum insn_code icode1;
8645 optab optab1, interm_optab;
8646 tree vectype = vectype_in;
8647 tree narrow_vectype = vectype_out;
8648 enum tree_code c1;
8649 tree intermediate_type;
8650 machine_mode intermediate_mode, prev_mode;
8651 int i;
8652 bool uns;
8654 *multi_step_cvt = 0;
8655 switch (code)
8657 CASE_CONVERT:
8658 c1 = VEC_PACK_TRUNC_EXPR;
8659 break;
8661 case FIX_TRUNC_EXPR:
8662 c1 = VEC_PACK_FIX_TRUNC_EXPR;
8663 break;
8665 case FLOAT_EXPR:
8666 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8667 tree code and optabs used for computing the operation. */
8668 return false;
8670 default:
8671 gcc_unreachable ();
8674 if (code == FIX_TRUNC_EXPR)
8675 /* The signedness is determined from output operand. */
8676 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8677 else
8678 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8680 if (!optab1)
8681 return false;
8683 vec_mode = TYPE_MODE (vectype);
8684 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
8685 return false;
8687 *code1 = c1;
8689 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8690 return true;
8692 /* Check if it's a multi-step conversion that can be done using intermediate
8693 types. */
8694 prev_mode = vec_mode;
8695 if (code == FIX_TRUNC_EXPR)
8696 uns = TYPE_UNSIGNED (vectype_out);
8697 else
8698 uns = TYPE_UNSIGNED (vectype);
8700 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8701 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8702 costly than signed. */
8703 if (code == FIX_TRUNC_EXPR && uns)
8705 enum insn_code icode2;
8707 intermediate_type
8708 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8709 interm_optab
8710 = optab_for_tree_code (c1, intermediate_type, optab_default);
8711 if (interm_optab != unknown_optab
8712 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8713 && insn_data[icode1].operand[0].mode
8714 == insn_data[icode2].operand[0].mode)
8716 uns = false;
8717 optab1 = interm_optab;
8718 icode1 = icode2;
8722 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8723 intermediate steps in promotion sequence. We try
8724 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8725 interm_types->create (MAX_INTERM_CVT_STEPS);
8726 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8728 intermediate_mode = insn_data[icode1].operand[0].mode;
8729 intermediate_type
8730 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8731 interm_optab
8732 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8733 optab_default);
8734 if (!interm_optab
8735 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8736 || insn_data[icode1].operand[0].mode != intermediate_mode
8737 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8738 == CODE_FOR_nothing))
8739 break;
8741 interm_types->quick_push (intermediate_type);
8742 (*multi_step_cvt)++;
8744 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8745 return true;
8747 prev_mode = intermediate_mode;
8748 optab1 = interm_optab;
8751 interm_types->release ();
8752 return false;