Remove assert in get_def_bb_for_const
[official-gcc.git] / gcc / tree-vect-stmts.c
blobd2e16d0b9296aa422b2158f037d4dfdd25e5000b
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Return the vectorized type for the given statement. */
57 tree
58 stmt_vectype (struct _stmt_vec_info *stmt_info)
60 return STMT_VINFO_VECTYPE (stmt_info);
63 /* Return TRUE iff the given statement is in an inner loop relative to
64 the loop being vectorized. */
65 bool
66 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
68 gimple *stmt = STMT_VINFO_STMT (stmt_info);
69 basic_block bb = gimple_bb (stmt);
70 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
71 struct loop* loop;
73 if (!loop_vinfo)
74 return false;
76 loop = LOOP_VINFO_LOOP (loop_vinfo);
78 return (bb->loop_father == loop->inner);
81 /* Record the cost of a statement, either by directly informing the
82 target model or by saving it in a vector for later processing.
83 Return a preliminary estimate of the statement's cost. */
85 unsigned
86 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
87 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
88 int misalign, enum vect_cost_model_location where)
90 if (body_cost_vec)
92 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
93 stmt_info_for_cost si = { count, kind,
94 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
95 misalign };
96 body_cost_vec->safe_push (si);
97 return (unsigned)
98 (builtin_vectorization_cost (kind, vectype, misalign) * count);
100 else
101 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
102 count, kind, stmt_info, misalign, where);
105 /* Return a variable of type ELEM_TYPE[NELEMS]. */
107 static tree
108 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
110 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
111 "vect_array");
114 /* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
119 static tree
120 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
121 tree array, unsigned HOST_WIDE_INT n)
123 tree vect_type, vect, vect_name, array_ref;
124 gimple *new_stmt;
126 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
127 vect_type = TREE_TYPE (TREE_TYPE (array));
128 vect = vect_create_destination_var (scalar_dest, vect_type);
129 array_ref = build4 (ARRAY_REF, vect_type, array,
130 build_int_cst (size_type_node, n),
131 NULL_TREE, NULL_TREE);
133 new_stmt = gimple_build_assign (vect, array_ref);
134 vect_name = make_ssa_name (vect, new_stmt);
135 gimple_assign_set_lhs (new_stmt, vect_name);
136 vect_finish_stmt_generation (stmt, new_stmt, gsi);
138 return vect_name;
141 /* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
145 static void
146 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
147 tree array, unsigned HOST_WIDE_INT n)
149 tree array_ref;
150 gimple *new_stmt;
152 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
153 build_int_cst (size_type_node, n),
154 NULL_TREE, NULL_TREE);
156 new_stmt = gimple_build_assign (array_ref, vect);
157 vect_finish_stmt_generation (stmt, new_stmt, gsi);
160 /* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
162 (and its group). */
164 static tree
165 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
167 tree mem_ref, alias_ptr_type;
169 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
170 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
171 /* Arrays have the same alignment as their type. */
172 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
173 return mem_ref;
176 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
178 /* Function vect_mark_relevant.
180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
182 static void
183 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
184 enum vect_relevant relevant, bool live_p)
186 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
187 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
188 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
189 gimple *pattern_stmt;
191 if (dump_enabled_p ())
193 dump_printf_loc (MSG_NOTE, vect_location,
194 "mark relevant %d, live %d: ", relevant, live_p);
195 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
198 /* If this stmt is an original stmt in a pattern, we might need to mark its
199 related pattern stmt instead of the original stmt. However, such stmts
200 may have their own uses that are not in any pattern, in such cases the
201 stmt itself should be marked. */
202 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
204 /* This is the last stmt in a sequence that was detected as a
205 pattern that can potentially be vectorized. Don't mark the stmt
206 as relevant/live because it's not going to be vectorized.
207 Instead mark the pattern-stmt that replaces it. */
209 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
211 if (dump_enabled_p ())
212 dump_printf_loc (MSG_NOTE, vect_location,
213 "last stmt in pattern. don't mark"
214 " relevant/live.\n");
215 stmt_info = vinfo_for_stmt (pattern_stmt);
216 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
217 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
218 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
219 stmt = pattern_stmt;
222 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
223 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
224 STMT_VINFO_RELEVANT (stmt_info) = relevant;
226 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
227 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
229 if (dump_enabled_p ())
230 dump_printf_loc (MSG_NOTE, vect_location,
231 "already marked relevant/live.\n");
232 return;
235 worklist->safe_push (stmt);
239 /* Function vect_stmt_relevant_p.
241 Return true if STMT in loop that is represented by LOOP_VINFO is
242 "relevant for vectorization".
244 A stmt is considered "relevant for vectorization" if:
245 - it has uses outside the loop.
246 - it has vdefs (it alters memory).
247 - control stmts in the loop (except for the exit condition).
249 CHECKME: what other side effects would the vectorizer allow? */
251 static bool
252 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
253 enum vect_relevant *relevant, bool *live_p)
255 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
256 ssa_op_iter op_iter;
257 imm_use_iterator imm_iter;
258 use_operand_p use_p;
259 def_operand_p def_p;
261 *relevant = vect_unused_in_scope;
262 *live_p = false;
264 /* cond stmt other than loop exit cond. */
265 if (is_ctrl_stmt (stmt)
266 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
267 != loop_exit_ctrl_vec_info_type)
268 *relevant = vect_used_in_scope;
270 /* changing memory. */
271 if (gimple_code (stmt) != GIMPLE_PHI)
272 if (gimple_vdef (stmt)
273 && !gimple_clobber_p (stmt))
275 if (dump_enabled_p ())
276 dump_printf_loc (MSG_NOTE, vect_location,
277 "vec_stmt_relevant_p: stmt has vdefs.\n");
278 *relevant = vect_used_in_scope;
281 /* uses outside the loop. */
282 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
284 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
286 basic_block bb = gimple_bb (USE_STMT (use_p));
287 if (!flow_bb_inside_loop_p (loop, bb))
289 if (dump_enabled_p ())
290 dump_printf_loc (MSG_NOTE, vect_location,
291 "vec_stmt_relevant_p: used out of loop.\n");
293 if (is_gimple_debug (USE_STMT (use_p)))
294 continue;
296 /* We expect all such uses to be in the loop exit phis
297 (because of loop closed form) */
298 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
299 gcc_assert (bb == single_exit (loop)->dest);
301 *live_p = true;
306 return (*live_p || *relevant);
310 /* Function exist_non_indexing_operands_for_use_p
312 USE is one of the uses attached to STMT. Check if USE is
313 used in STMT for anything other than indexing an array. */
315 static bool
316 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
318 tree operand;
319 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
321 /* USE corresponds to some operand in STMT. If there is no data
322 reference in STMT, then any operand that corresponds to USE
323 is not indexing an array. */
324 if (!STMT_VINFO_DATA_REF (stmt_info))
325 return true;
327 /* STMT has a data_ref. FORNOW this means that its of one of
328 the following forms:
329 -1- ARRAY_REF = var
330 -2- var = ARRAY_REF
331 (This should have been verified in analyze_data_refs).
333 'var' in the second case corresponds to a def, not a use,
334 so USE cannot correspond to any operands that are not used
335 for array indexing.
337 Therefore, all we need to check is if STMT falls into the
338 first case, and whether var corresponds to USE. */
340 if (!gimple_assign_copy_p (stmt))
342 if (is_gimple_call (stmt)
343 && gimple_call_internal_p (stmt))
344 switch (gimple_call_internal_fn (stmt))
346 case IFN_MASK_STORE:
347 operand = gimple_call_arg (stmt, 3);
348 if (operand == use)
349 return true;
350 /* FALLTHRU */
351 case IFN_MASK_LOAD:
352 operand = gimple_call_arg (stmt, 2);
353 if (operand == use)
354 return true;
355 break;
356 default:
357 break;
359 return false;
362 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
363 return false;
364 operand = gimple_assign_rhs1 (stmt);
365 if (TREE_CODE (operand) != SSA_NAME)
366 return false;
368 if (operand == use)
369 return true;
371 return false;
376 Function process_use.
378 Inputs:
379 - a USE in STMT in a loop represented by LOOP_VINFO
380 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
381 that defined USE. This is done by calling mark_relevant and passing it
382 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
383 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
384 be performed.
386 Outputs:
387 Generally, LIVE_P and RELEVANT are used to define the liveness and
388 relevance info of the DEF_STMT of this USE:
389 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
390 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
391 Exceptions:
392 - case 1: If USE is used only for address computations (e.g. array indexing),
393 which does not need to be directly vectorized, then the liveness/relevance
394 of the respective DEF_STMT is left unchanged.
395 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
396 skip DEF_STMT cause it had already been processed.
397 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
398 be modified accordingly.
400 Return true if everything is as expected. Return false otherwise. */
402 static bool
403 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
404 enum vect_relevant relevant, vec<gimple *> *worklist,
405 bool force)
407 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
408 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
409 stmt_vec_info dstmt_vinfo;
410 basic_block bb, def_bb;
411 gimple *def_stmt;
412 enum vect_def_type dt;
414 /* case 1: we are only interested in uses that need to be vectorized. Uses
415 that are used for address computation are not considered relevant. */
416 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
417 return true;
419 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
421 if (dump_enabled_p ())
422 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
423 "not vectorized: unsupported use in stmt.\n");
424 return false;
427 if (!def_stmt || gimple_nop_p (def_stmt))
428 return true;
430 def_bb = gimple_bb (def_stmt);
431 if (!flow_bb_inside_loop_p (loop, def_bb))
433 if (dump_enabled_p ())
434 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
435 return true;
438 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
439 DEF_STMT must have already been processed, because this should be the
440 only way that STMT, which is a reduction-phi, was put in the worklist,
441 as there should be no other uses for DEF_STMT in the loop. So we just
442 check that everything is as expected, and we are done. */
443 dstmt_vinfo = vinfo_for_stmt (def_stmt);
444 bb = gimple_bb (stmt);
445 if (gimple_code (stmt) == GIMPLE_PHI
446 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
447 && gimple_code (def_stmt) != GIMPLE_PHI
448 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
449 && bb->loop_father == def_bb->loop_father)
451 if (dump_enabled_p ())
452 dump_printf_loc (MSG_NOTE, vect_location,
453 "reduc-stmt defining reduc-phi in the same nest.\n");
454 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
455 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
456 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
457 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
458 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
459 return true;
462 /* case 3a: outer-loop stmt defining an inner-loop stmt:
463 outer-loop-header-bb:
464 d = def_stmt
465 inner-loop:
466 stmt # use (d)
467 outer-loop-tail-bb:
468 ... */
469 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
471 if (dump_enabled_p ())
472 dump_printf_loc (MSG_NOTE, vect_location,
473 "outer-loop def-stmt defining inner-loop stmt.\n");
475 switch (relevant)
477 case vect_unused_in_scope:
478 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
479 vect_used_in_scope : vect_unused_in_scope;
480 break;
482 case vect_used_in_outer_by_reduction:
483 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
484 relevant = vect_used_by_reduction;
485 break;
487 case vect_used_in_outer:
488 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
489 relevant = vect_used_in_scope;
490 break;
492 case vect_used_in_scope:
493 break;
495 default:
496 gcc_unreachable ();
500 /* case 3b: inner-loop stmt defining an outer-loop stmt:
501 outer-loop-header-bb:
503 inner-loop:
504 d = def_stmt
505 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
506 stmt # use (d) */
507 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
509 if (dump_enabled_p ())
510 dump_printf_loc (MSG_NOTE, vect_location,
511 "inner-loop def-stmt defining outer-loop stmt.\n");
513 switch (relevant)
515 case vect_unused_in_scope:
516 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
517 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
518 vect_used_in_outer_by_reduction : vect_unused_in_scope;
519 break;
521 case vect_used_by_reduction:
522 relevant = vect_used_in_outer_by_reduction;
523 break;
525 case vect_used_in_scope:
526 relevant = vect_used_in_outer;
527 break;
529 default:
530 gcc_unreachable ();
534 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
535 return true;
539 /* Function vect_mark_stmts_to_be_vectorized.
541 Not all stmts in the loop need to be vectorized. For example:
543 for i...
544 for j...
545 1. T0 = i + j
546 2. T1 = a[T0]
548 3. j = j + 1
550 Stmt 1 and 3 do not need to be vectorized, because loop control and
551 addressing of vectorized data-refs are handled differently.
553 This pass detects such stmts. */
555 bool
556 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
558 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
559 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
560 unsigned int nbbs = loop->num_nodes;
561 gimple_stmt_iterator si;
562 gimple *stmt;
563 unsigned int i;
564 stmt_vec_info stmt_vinfo;
565 basic_block bb;
566 gimple *phi;
567 bool live_p;
568 enum vect_relevant relevant, tmp_relevant;
569 enum vect_def_type def_type;
571 if (dump_enabled_p ())
572 dump_printf_loc (MSG_NOTE, vect_location,
573 "=== vect_mark_stmts_to_be_vectorized ===\n");
575 auto_vec<gimple *, 64> worklist;
577 /* 1. Init worklist. */
578 for (i = 0; i < nbbs; i++)
580 bb = bbs[i];
581 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
583 phi = gsi_stmt (si);
584 if (dump_enabled_p ())
586 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
587 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
590 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
591 vect_mark_relevant (&worklist, phi, relevant, live_p);
593 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
595 stmt = gsi_stmt (si);
596 if (dump_enabled_p ())
598 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
599 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
602 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
603 vect_mark_relevant (&worklist, stmt, relevant, live_p);
607 /* 2. Process_worklist */
608 while (worklist.length () > 0)
610 use_operand_p use_p;
611 ssa_op_iter iter;
613 stmt = worklist.pop ();
614 if (dump_enabled_p ())
616 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
617 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
620 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
621 (DEF_STMT) as relevant/irrelevant and live/dead according to the
622 liveness and relevance properties of STMT. */
623 stmt_vinfo = vinfo_for_stmt (stmt);
624 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
625 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
627 /* Generally, the liveness and relevance properties of STMT are
628 propagated as is to the DEF_STMTs of its USEs:
629 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
630 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
632 One exception is when STMT has been identified as defining a reduction
633 variable; in this case we set the liveness/relevance as follows:
634 live_p = false
635 relevant = vect_used_by_reduction
636 This is because we distinguish between two kinds of relevant stmts -
637 those that are used by a reduction computation, and those that are
638 (also) used by a regular computation. This allows us later on to
639 identify stmts that are used solely by a reduction, and therefore the
640 order of the results that they produce does not have to be kept. */
642 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
643 tmp_relevant = relevant;
644 switch (def_type)
646 case vect_reduction_def:
647 switch (tmp_relevant)
649 case vect_unused_in_scope:
650 relevant = vect_used_by_reduction;
651 break;
653 case vect_used_by_reduction:
654 if (gimple_code (stmt) == GIMPLE_PHI)
655 break;
656 /* fall through */
658 default:
659 if (dump_enabled_p ())
660 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
661 "unsupported use of reduction.\n");
662 return false;
665 live_p = false;
666 break;
668 case vect_nested_cycle:
669 if (tmp_relevant != vect_unused_in_scope
670 && tmp_relevant != vect_used_in_outer_by_reduction
671 && tmp_relevant != vect_used_in_outer)
673 if (dump_enabled_p ())
674 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
675 "unsupported use of nested cycle.\n");
677 return false;
680 live_p = false;
681 break;
683 case vect_double_reduction_def:
684 if (tmp_relevant != vect_unused_in_scope
685 && tmp_relevant != vect_used_by_reduction)
687 if (dump_enabled_p ())
688 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
689 "unsupported use of double reduction.\n");
691 return false;
694 live_p = false;
695 break;
697 default:
698 break;
701 if (is_pattern_stmt_p (stmt_vinfo))
703 /* Pattern statements are not inserted into the code, so
704 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
705 have to scan the RHS or function arguments instead. */
706 if (is_gimple_assign (stmt))
708 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
709 tree op = gimple_assign_rhs1 (stmt);
711 i = 1;
712 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
714 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
715 live_p, relevant, &worklist, false)
716 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
717 live_p, relevant, &worklist, false))
718 return false;
719 i = 2;
721 for (; i < gimple_num_ops (stmt); i++)
723 op = gimple_op (stmt, i);
724 if (TREE_CODE (op) == SSA_NAME
725 && !process_use (stmt, op, loop_vinfo, live_p, relevant,
726 &worklist, false))
727 return false;
730 else if (is_gimple_call (stmt))
732 for (i = 0; i < gimple_call_num_args (stmt); i++)
734 tree arg = gimple_call_arg (stmt, i);
735 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
736 &worklist, false))
737 return false;
741 else
742 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
744 tree op = USE_FROM_PTR (use_p);
745 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
746 &worklist, false))
747 return false;
750 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
752 tree off;
753 tree decl = vect_check_gather_scatter (stmt, loop_vinfo, NULL, &off, NULL);
754 gcc_assert (decl);
755 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
756 &worklist, true))
757 return false;
759 } /* while worklist */
761 return true;
765 /* Function vect_model_simple_cost.
767 Models cost for simple operations, i.e. those that only emit ncopies of a
768 single op. Right now, this does not account for multiple insns that could
769 be generated for the single vector op. We will handle that shortly. */
771 void
772 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
773 enum vect_def_type *dt,
774 stmt_vector_for_cost *prologue_cost_vec,
775 stmt_vector_for_cost *body_cost_vec)
777 int i;
778 int inside_cost = 0, prologue_cost = 0;
780 /* The SLP costs were already calculated during SLP tree build. */
781 if (PURE_SLP_STMT (stmt_info))
782 return;
784 /* FORNOW: Assuming maximum 2 args per stmts. */
785 for (i = 0; i < 2; i++)
786 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
787 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
788 stmt_info, 0, vect_prologue);
790 /* Pass the inside-of-loop statements to the target-specific cost model. */
791 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
792 stmt_info, 0, vect_body);
794 if (dump_enabled_p ())
795 dump_printf_loc (MSG_NOTE, vect_location,
796 "vect_model_simple_cost: inside_cost = %d, "
797 "prologue_cost = %d .\n", inside_cost, prologue_cost);
801 /* Model cost for type demotion and promotion operations. PWR is normally
802 zero for single-step promotions and demotions. It will be one if
803 two-step promotion/demotion is required, and so on. Each additional
804 step doubles the number of instructions required. */
806 static void
807 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
808 enum vect_def_type *dt, int pwr)
810 int i, tmp;
811 int inside_cost = 0, prologue_cost = 0;
812 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
813 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
814 void *target_cost_data;
816 /* The SLP costs were already calculated during SLP tree build. */
817 if (PURE_SLP_STMT (stmt_info))
818 return;
820 if (loop_vinfo)
821 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
822 else
823 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
825 for (i = 0; i < pwr + 1; i++)
827 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
828 (i + 1) : i;
829 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
830 vec_promote_demote, stmt_info, 0,
831 vect_body);
834 /* FORNOW: Assuming maximum 2 args per stmts. */
835 for (i = 0; i < 2; i++)
836 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
837 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
838 stmt_info, 0, vect_prologue);
840 if (dump_enabled_p ())
841 dump_printf_loc (MSG_NOTE, vect_location,
842 "vect_model_promotion_demotion_cost: inside_cost = %d, "
843 "prologue_cost = %d .\n", inside_cost, prologue_cost);
846 /* Function vect_cost_group_size
848 For grouped load or store, return the group_size only if it is the first
849 load or store of a group, else return 1. This ensures that group size is
850 only returned once per group. */
852 static int
853 vect_cost_group_size (stmt_vec_info stmt_info)
855 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
857 if (first_stmt == STMT_VINFO_STMT (stmt_info))
858 return GROUP_SIZE (stmt_info);
860 return 1;
864 /* Function vect_model_store_cost
866 Models cost for stores. In the case of grouped accesses, one access
867 has the overhead of the grouped access attributed to it. */
869 void
870 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
871 bool store_lanes_p, enum vect_def_type dt,
872 slp_tree slp_node,
873 stmt_vector_for_cost *prologue_cost_vec,
874 stmt_vector_for_cost *body_cost_vec)
876 int group_size;
877 unsigned int inside_cost = 0, prologue_cost = 0;
878 struct data_reference *first_dr;
879 gimple *first_stmt;
881 if (dt == vect_constant_def || dt == vect_external_def)
882 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
883 stmt_info, 0, vect_prologue);
885 /* Grouped access? */
886 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
888 if (slp_node)
890 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
891 group_size = 1;
893 else
895 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
896 group_size = vect_cost_group_size (stmt_info);
899 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
901 /* Not a grouped access. */
902 else
904 group_size = 1;
905 first_dr = STMT_VINFO_DATA_REF (stmt_info);
908 /* We assume that the cost of a single store-lanes instruction is
909 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
910 access is instead being provided by a permute-and-store operation,
911 include the cost of the permutes. */
912 if (!store_lanes_p && group_size > 1
913 && !STMT_VINFO_STRIDED_P (stmt_info))
915 /* Uses a high and low interleave or shuffle operations for each
916 needed permute. */
917 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
918 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
919 stmt_info, 0, vect_body);
921 if (dump_enabled_p ())
922 dump_printf_loc (MSG_NOTE, vect_location,
923 "vect_model_store_cost: strided group_size = %d .\n",
924 group_size);
927 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
928 /* Costs of the stores. */
929 if (STMT_VINFO_STRIDED_P (stmt_info)
930 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
932 /* N scalar stores plus extracting the elements. */
933 inside_cost += record_stmt_cost (body_cost_vec,
934 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
935 scalar_store, stmt_info, 0, vect_body);
937 else
938 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
940 if (STMT_VINFO_STRIDED_P (stmt_info))
941 inside_cost += record_stmt_cost (body_cost_vec,
942 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
943 vec_to_scalar, stmt_info, 0, vect_body);
945 if (dump_enabled_p ())
946 dump_printf_loc (MSG_NOTE, vect_location,
947 "vect_model_store_cost: inside_cost = %d, "
948 "prologue_cost = %d .\n", inside_cost, prologue_cost);
952 /* Calculate cost of DR's memory access. */
953 void
954 vect_get_store_cost (struct data_reference *dr, int ncopies,
955 unsigned int *inside_cost,
956 stmt_vector_for_cost *body_cost_vec)
958 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
959 gimple *stmt = DR_STMT (dr);
960 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
962 switch (alignment_support_scheme)
964 case dr_aligned:
966 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
967 vector_store, stmt_info, 0,
968 vect_body);
970 if (dump_enabled_p ())
971 dump_printf_loc (MSG_NOTE, vect_location,
972 "vect_model_store_cost: aligned.\n");
973 break;
976 case dr_unaligned_supported:
978 /* Here, we assign an additional cost for the unaligned store. */
979 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
980 unaligned_store, stmt_info,
981 DR_MISALIGNMENT (dr), vect_body);
982 if (dump_enabled_p ())
983 dump_printf_loc (MSG_NOTE, vect_location,
984 "vect_model_store_cost: unaligned supported by "
985 "hardware.\n");
986 break;
989 case dr_unaligned_unsupported:
991 *inside_cost = VECT_MAX_COST;
993 if (dump_enabled_p ())
994 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
995 "vect_model_store_cost: unsupported access.\n");
996 break;
999 default:
1000 gcc_unreachable ();
1005 /* Function vect_model_load_cost
1007 Models cost for loads. In the case of grouped accesses, the last access
1008 has the overhead of the grouped access attributed to it. Since unaligned
1009 accesses are supported for loads, we also account for the costs of the
1010 access scheme chosen. */
1012 void
1013 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1014 bool load_lanes_p, slp_tree slp_node,
1015 stmt_vector_for_cost *prologue_cost_vec,
1016 stmt_vector_for_cost *body_cost_vec)
1018 int group_size;
1019 gimple *first_stmt;
1020 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1021 unsigned int inside_cost = 0, prologue_cost = 0;
1023 /* Grouped accesses? */
1024 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1025 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1027 group_size = vect_cost_group_size (stmt_info);
1028 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1030 /* Not a grouped access. */
1031 else
1033 group_size = 1;
1034 first_dr = dr;
1037 /* We assume that the cost of a single load-lanes instruction is
1038 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1039 access is instead being provided by a load-and-permute operation,
1040 include the cost of the permutes. */
1041 if (!load_lanes_p && group_size > 1
1042 && !STMT_VINFO_STRIDED_P (stmt_info))
1044 /* Uses an even and odd extract operations or shuffle operations
1045 for each needed permute. */
1046 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1047 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1048 stmt_info, 0, vect_body);
1050 if (dump_enabled_p ())
1051 dump_printf_loc (MSG_NOTE, vect_location,
1052 "vect_model_load_cost: strided group_size = %d .\n",
1053 group_size);
1056 /* The loads themselves. */
1057 if (STMT_VINFO_STRIDED_P (stmt_info)
1058 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1060 /* N scalar loads plus gathering them into a vector. */
1061 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1062 inside_cost += record_stmt_cost (body_cost_vec,
1063 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1064 scalar_load, stmt_info, 0, vect_body);
1066 else
1067 vect_get_load_cost (first_dr, ncopies,
1068 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1069 || group_size > 1 || slp_node),
1070 &inside_cost, &prologue_cost,
1071 prologue_cost_vec, body_cost_vec, true);
1072 if (STMT_VINFO_STRIDED_P (stmt_info))
1073 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1074 stmt_info, 0, vect_body);
1076 if (dump_enabled_p ())
1077 dump_printf_loc (MSG_NOTE, vect_location,
1078 "vect_model_load_cost: inside_cost = %d, "
1079 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1083 /* Calculate cost of DR's memory access. */
1084 void
1085 vect_get_load_cost (struct data_reference *dr, int ncopies,
1086 bool add_realign_cost, unsigned int *inside_cost,
1087 unsigned int *prologue_cost,
1088 stmt_vector_for_cost *prologue_cost_vec,
1089 stmt_vector_for_cost *body_cost_vec,
1090 bool record_prologue_costs)
1092 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1093 gimple *stmt = DR_STMT (dr);
1094 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1096 switch (alignment_support_scheme)
1098 case dr_aligned:
1100 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1101 stmt_info, 0, vect_body);
1103 if (dump_enabled_p ())
1104 dump_printf_loc (MSG_NOTE, vect_location,
1105 "vect_model_load_cost: aligned.\n");
1107 break;
1109 case dr_unaligned_supported:
1111 /* Here, we assign an additional cost for the unaligned load. */
1112 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1113 unaligned_load, stmt_info,
1114 DR_MISALIGNMENT (dr), vect_body);
1116 if (dump_enabled_p ())
1117 dump_printf_loc (MSG_NOTE, vect_location,
1118 "vect_model_load_cost: unaligned supported by "
1119 "hardware.\n");
1121 break;
1123 case dr_explicit_realign:
1125 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1126 vector_load, stmt_info, 0, vect_body);
1127 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1128 vec_perm, stmt_info, 0, vect_body);
1130 /* FIXME: If the misalignment remains fixed across the iterations of
1131 the containing loop, the following cost should be added to the
1132 prologue costs. */
1133 if (targetm.vectorize.builtin_mask_for_load)
1134 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1135 stmt_info, 0, vect_body);
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_NOTE, vect_location,
1139 "vect_model_load_cost: explicit realign\n");
1141 break;
1143 case dr_explicit_realign_optimized:
1145 if (dump_enabled_p ())
1146 dump_printf_loc (MSG_NOTE, vect_location,
1147 "vect_model_load_cost: unaligned software "
1148 "pipelined.\n");
1150 /* Unaligned software pipeline has a load of an address, an initial
1151 load, and possibly a mask operation to "prime" the loop. However,
1152 if this is an access in a group of loads, which provide grouped
1153 access, then the above cost should only be considered for one
1154 access in the group. Inside the loop, there is a load op
1155 and a realignment op. */
1157 if (add_realign_cost && record_prologue_costs)
1159 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1160 vector_stmt, stmt_info,
1161 0, vect_prologue);
1162 if (targetm.vectorize.builtin_mask_for_load)
1163 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1164 vector_stmt, stmt_info,
1165 0, vect_prologue);
1168 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1169 stmt_info, 0, vect_body);
1170 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1171 stmt_info, 0, vect_body);
1173 if (dump_enabled_p ())
1174 dump_printf_loc (MSG_NOTE, vect_location,
1175 "vect_model_load_cost: explicit realign optimized"
1176 "\n");
1178 break;
1181 case dr_unaligned_unsupported:
1183 *inside_cost = VECT_MAX_COST;
1185 if (dump_enabled_p ())
1186 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1187 "vect_model_load_cost: unsupported access.\n");
1188 break;
1191 default:
1192 gcc_unreachable ();
1196 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1197 the loop preheader for the vectorized stmt STMT. */
1199 static void
1200 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1202 if (gsi)
1203 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1204 else
1206 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1207 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1209 if (loop_vinfo)
1211 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1212 basic_block new_bb;
1213 edge pe;
1215 if (nested_in_vect_loop_p (loop, stmt))
1216 loop = loop->inner;
1218 pe = loop_preheader_edge (loop);
1219 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1220 gcc_assert (!new_bb);
1222 else
1224 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1225 basic_block bb;
1226 gimple_stmt_iterator gsi_bb_start;
1228 gcc_assert (bb_vinfo);
1229 bb = BB_VINFO_BB (bb_vinfo);
1230 gsi_bb_start = gsi_after_labels (bb);
1231 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1235 if (dump_enabled_p ())
1237 dump_printf_loc (MSG_NOTE, vect_location,
1238 "created new init_stmt: ");
1239 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1243 /* Function vect_init_vector.
1245 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1246 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1247 vector type a vector with all elements equal to VAL is created first.
1248 Place the initialization at BSI if it is not NULL. Otherwise, place the
1249 initialization at the loop preheader.
1250 Return the DEF of INIT_STMT.
1251 It will be used in the vectorization of STMT. */
1253 tree
1254 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1256 gimple *init_stmt;
1257 tree new_temp;
1259 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1260 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1262 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1263 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1265 /* Scalar boolean value should be transformed into
1266 all zeros or all ones value before building a vector. */
1267 if (VECTOR_BOOLEAN_TYPE_P (type))
1269 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1270 tree false_val = build_zero_cst (TREE_TYPE (type));
1272 if (CONSTANT_CLASS_P (val))
1273 val = integer_zerop (val) ? false_val : true_val;
1274 else
1276 new_temp = make_ssa_name (TREE_TYPE (type));
1277 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1278 val, true_val, false_val);
1279 vect_init_vector_1 (stmt, init_stmt, gsi);
1280 val = new_temp;
1283 else if (CONSTANT_CLASS_P (val))
1284 val = fold_convert (TREE_TYPE (type), val);
1285 else
1287 new_temp = make_ssa_name (TREE_TYPE (type));
1288 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1289 init_stmt = gimple_build_assign (new_temp,
1290 fold_build1 (VIEW_CONVERT_EXPR,
1291 TREE_TYPE (type),
1292 val));
1293 else
1294 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1295 vect_init_vector_1 (stmt, init_stmt, gsi);
1296 val = new_temp;
1299 val = build_vector_from_val (type, val);
1302 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1303 init_stmt = gimple_build_assign (new_temp, val);
1304 vect_init_vector_1 (stmt, init_stmt, gsi);
1305 return new_temp;
1309 /* Function vect_get_vec_def_for_operand.
1311 OP is an operand in STMT. This function returns a (vector) def that will be
1312 used in the vectorized stmt for STMT.
1314 In the case that OP is an SSA_NAME which is defined in the loop, then
1315 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1317 In case OP is an invariant or constant, a new stmt that creates a vector def
1318 needs to be introduced. VECTYPE may be used to specify a required type for
1319 vector invariant. */
1321 tree
1322 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1324 tree vec_oprnd;
1325 gimple *vec_stmt;
1326 gimple *def_stmt;
1327 stmt_vec_info def_stmt_info = NULL;
1328 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1329 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1330 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1331 enum vect_def_type dt;
1332 bool is_simple_use;
1333 tree vector_type;
1335 if (dump_enabled_p ())
1337 dump_printf_loc (MSG_NOTE, vect_location,
1338 "vect_get_vec_def_for_operand: ");
1339 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1340 dump_printf (MSG_NOTE, "\n");
1343 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1344 gcc_assert (is_simple_use);
1345 if (dump_enabled_p ())
1347 int loc_printed = 0;
1348 if (def_stmt)
1350 if (loc_printed)
1351 dump_printf (MSG_NOTE, " def_stmt = ");
1352 else
1353 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1354 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1358 switch (dt)
1360 /* operand is a constant or a loop invariant. */
1361 case vect_constant_def:
1362 case vect_external_def:
1364 if (vectype)
1365 vector_type = vectype;
1366 else if (TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE
1367 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1368 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1369 else
1370 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1372 gcc_assert (vector_type);
1373 return vect_init_vector (stmt, op, vector_type, NULL);
1376 /* operand is defined inside the loop. */
1377 case vect_internal_def:
1379 /* Get the def from the vectorized stmt. */
1380 def_stmt_info = vinfo_for_stmt (def_stmt);
1382 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1383 /* Get vectorized pattern statement. */
1384 if (!vec_stmt
1385 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1386 && !STMT_VINFO_RELEVANT (def_stmt_info))
1387 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1388 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1389 gcc_assert (vec_stmt);
1390 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1391 vec_oprnd = PHI_RESULT (vec_stmt);
1392 else if (is_gimple_call (vec_stmt))
1393 vec_oprnd = gimple_call_lhs (vec_stmt);
1394 else
1395 vec_oprnd = gimple_assign_lhs (vec_stmt);
1396 return vec_oprnd;
1399 /* operand is defined by a loop header phi - reduction */
1400 case vect_reduction_def:
1401 case vect_double_reduction_def:
1402 case vect_nested_cycle:
1403 /* Code should use get_initial_def_for_reduction. */
1404 gcc_unreachable ();
1406 /* operand is defined by loop-header phi - induction. */
1407 case vect_induction_def:
1409 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1411 /* Get the def from the vectorized stmt. */
1412 def_stmt_info = vinfo_for_stmt (def_stmt);
1413 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1414 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1415 vec_oprnd = PHI_RESULT (vec_stmt);
1416 else
1417 vec_oprnd = gimple_get_lhs (vec_stmt);
1418 return vec_oprnd;
1421 default:
1422 gcc_unreachable ();
1427 /* Function vect_get_vec_def_for_stmt_copy
1429 Return a vector-def for an operand. This function is used when the
1430 vectorized stmt to be created (by the caller to this function) is a "copy"
1431 created in case the vectorized result cannot fit in one vector, and several
1432 copies of the vector-stmt are required. In this case the vector-def is
1433 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1434 of the stmt that defines VEC_OPRND.
1435 DT is the type of the vector def VEC_OPRND.
1437 Context:
1438 In case the vectorization factor (VF) is bigger than the number
1439 of elements that can fit in a vectype (nunits), we have to generate
1440 more than one vector stmt to vectorize the scalar stmt. This situation
1441 arises when there are multiple data-types operated upon in the loop; the
1442 smallest data-type determines the VF, and as a result, when vectorizing
1443 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1444 vector stmt (each computing a vector of 'nunits' results, and together
1445 computing 'VF' results in each iteration). This function is called when
1446 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1447 which VF=16 and nunits=4, so the number of copies required is 4):
1449 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1451 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1452 VS1.1: vx.1 = memref1 VS1.2
1453 VS1.2: vx.2 = memref2 VS1.3
1454 VS1.3: vx.3 = memref3
1456 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1457 VSnew.1: vz1 = vx.1 + ... VSnew.2
1458 VSnew.2: vz2 = vx.2 + ... VSnew.3
1459 VSnew.3: vz3 = vx.3 + ...
1461 The vectorization of S1 is explained in vectorizable_load.
1462 The vectorization of S2:
1463 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1464 the function 'vect_get_vec_def_for_operand' is called to
1465 get the relevant vector-def for each operand of S2. For operand x it
1466 returns the vector-def 'vx.0'.
1468 To create the remaining copies of the vector-stmt (VSnew.j), this
1469 function is called to get the relevant vector-def for each operand. It is
1470 obtained from the respective VS1.j stmt, which is recorded in the
1471 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1473 For example, to obtain the vector-def 'vx.1' in order to create the
1474 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1475 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1476 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1477 and return its def ('vx.1').
1478 Overall, to create the above sequence this function will be called 3 times:
1479 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1480 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1481 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1483 tree
1484 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1486 gimple *vec_stmt_for_operand;
1487 stmt_vec_info def_stmt_info;
1489 /* Do nothing; can reuse same def. */
1490 if (dt == vect_external_def || dt == vect_constant_def )
1491 return vec_oprnd;
1493 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1494 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1495 gcc_assert (def_stmt_info);
1496 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1497 gcc_assert (vec_stmt_for_operand);
1498 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1499 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1500 else
1501 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1502 return vec_oprnd;
1506 /* Get vectorized definitions for the operands to create a copy of an original
1507 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1509 static void
1510 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1511 vec<tree> *vec_oprnds0,
1512 vec<tree> *vec_oprnds1)
1514 tree vec_oprnd = vec_oprnds0->pop ();
1516 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1517 vec_oprnds0->quick_push (vec_oprnd);
1519 if (vec_oprnds1 && vec_oprnds1->length ())
1521 vec_oprnd = vec_oprnds1->pop ();
1522 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1523 vec_oprnds1->quick_push (vec_oprnd);
1528 /* Get vectorized definitions for OP0 and OP1.
1529 REDUC_INDEX is the index of reduction operand in case of reduction,
1530 and -1 otherwise. */
1532 void
1533 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1534 vec<tree> *vec_oprnds0,
1535 vec<tree> *vec_oprnds1,
1536 slp_tree slp_node, int reduc_index)
1538 if (slp_node)
1540 int nops = (op1 == NULL_TREE) ? 1 : 2;
1541 auto_vec<tree> ops (nops);
1542 auto_vec<vec<tree> > vec_defs (nops);
1544 ops.quick_push (op0);
1545 if (op1)
1546 ops.quick_push (op1);
1548 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1550 *vec_oprnds0 = vec_defs[0];
1551 if (op1)
1552 *vec_oprnds1 = vec_defs[1];
1554 else
1556 tree vec_oprnd;
1558 vec_oprnds0->create (1);
1559 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1560 vec_oprnds0->quick_push (vec_oprnd);
1562 if (op1)
1564 vec_oprnds1->create (1);
1565 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1566 vec_oprnds1->quick_push (vec_oprnd);
1572 /* Function vect_finish_stmt_generation.
1574 Insert a new stmt. */
1576 void
1577 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1578 gimple_stmt_iterator *gsi)
1580 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1581 vec_info *vinfo = stmt_info->vinfo;
1583 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1585 if (!gsi_end_p (*gsi)
1586 && gimple_has_mem_ops (vec_stmt))
1588 gimple *at_stmt = gsi_stmt (*gsi);
1589 tree vuse = gimple_vuse (at_stmt);
1590 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1592 tree vdef = gimple_vdef (at_stmt);
1593 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1594 /* If we have an SSA vuse and insert a store, update virtual
1595 SSA form to avoid triggering the renamer. Do so only
1596 if we can easily see all uses - which is what almost always
1597 happens with the way vectorized stmts are inserted. */
1598 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1599 && ((is_gimple_assign (vec_stmt)
1600 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1601 || (is_gimple_call (vec_stmt)
1602 && !(gimple_call_flags (vec_stmt)
1603 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1605 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1606 gimple_set_vdef (vec_stmt, new_vdef);
1607 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1611 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1613 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1615 if (dump_enabled_p ())
1617 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1618 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1621 gimple_set_location (vec_stmt, gimple_location (stmt));
1623 /* While EH edges will generally prevent vectorization, stmt might
1624 e.g. be in a must-not-throw region. Ensure newly created stmts
1625 that could throw are part of the same region. */
1626 int lp_nr = lookup_stmt_eh_lp (stmt);
1627 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1628 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1631 /* We want to vectorize a call to combined function CFN with function
1632 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1633 as the types of all inputs. Check whether this is possible using
1634 an internal function, returning its code if so or IFN_LAST if not. */
1636 static internal_fn
1637 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1638 tree vectype_out, tree vectype_in)
1640 internal_fn ifn;
1641 if (internal_fn_p (cfn))
1642 ifn = as_internal_fn (cfn);
1643 else
1644 ifn = associated_internal_fn (fndecl);
1645 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1647 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1648 if (info.vectorizable)
1650 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1651 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1652 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1653 OPTIMIZE_FOR_SPEED))
1654 return ifn;
1657 return IFN_LAST;
1661 static tree permute_vec_elements (tree, tree, tree, gimple *,
1662 gimple_stmt_iterator *);
1665 /* Function vectorizable_mask_load_store.
1667 Check if STMT performs a conditional load or store that can be vectorized.
1668 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1669 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1670 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1672 static bool
1673 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
1674 gimple **vec_stmt, slp_tree slp_node)
1676 tree vec_dest = NULL;
1677 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1678 stmt_vec_info prev_stmt_info;
1679 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1680 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1681 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1682 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1683 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1684 tree rhs_vectype = NULL_TREE;
1685 tree mask_vectype;
1686 tree elem_type;
1687 gimple *new_stmt;
1688 tree dummy;
1689 tree dataref_ptr = NULL_TREE;
1690 gimple *ptr_incr;
1691 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1692 int ncopies;
1693 int i, j;
1694 bool inv_p;
1695 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1696 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1697 int gather_scale = 1;
1698 enum vect_def_type gather_dt = vect_unknown_def_type;
1699 bool is_store;
1700 tree mask;
1701 gimple *def_stmt;
1702 enum vect_def_type dt;
1704 if (slp_node != NULL)
1705 return false;
1707 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1708 gcc_assert (ncopies >= 1);
1710 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1711 mask = gimple_call_arg (stmt, 2);
1713 if (TREE_CODE (TREE_TYPE (mask)) != BOOLEAN_TYPE)
1714 return false;
1716 /* FORNOW. This restriction should be relaxed. */
1717 if (nested_in_vect_loop && ncopies > 1)
1719 if (dump_enabled_p ())
1720 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1721 "multiple types in nested loop.");
1722 return false;
1725 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1726 return false;
1728 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
1729 && ! vec_stmt)
1730 return false;
1732 if (!STMT_VINFO_DATA_REF (stmt_info))
1733 return false;
1735 elem_type = TREE_TYPE (vectype);
1737 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1738 return false;
1740 if (STMT_VINFO_STRIDED_P (stmt_info))
1741 return false;
1743 if (TREE_CODE (mask) != SSA_NAME)
1744 return false;
1746 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
1747 return false;
1749 if (!mask_vectype)
1750 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
1752 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
1753 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
1754 return false;
1756 if (is_store)
1758 tree rhs = gimple_call_arg (stmt, 3);
1759 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
1760 return false;
1763 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1765 gimple *def_stmt;
1766 gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
1767 &gather_off, &gather_scale);
1768 gcc_assert (gather_decl);
1769 if (!vect_is_simple_use (gather_off, loop_vinfo, &def_stmt, &gather_dt,
1770 &gather_off_vectype))
1772 if (dump_enabled_p ())
1773 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1774 "gather index use not simple.");
1775 return false;
1778 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1779 tree masktype
1780 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1781 if (TREE_CODE (masktype) == INTEGER_TYPE)
1783 if (dump_enabled_p ())
1784 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1785 "masked gather with integer mask not supported.");
1786 return false;
1789 else if (tree_int_cst_compare (nested_in_vect_loop
1790 ? STMT_VINFO_DR_STEP (stmt_info)
1791 : DR_STEP (dr), size_zero_node) <= 0)
1792 return false;
1793 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1794 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
1795 TYPE_MODE (mask_vectype),
1796 !is_store)
1797 || (rhs_vectype
1798 && !useless_type_conversion_p (vectype, rhs_vectype)))
1799 return false;
1801 if (!vec_stmt) /* transformation not required. */
1803 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1804 if (is_store)
1805 vect_model_store_cost (stmt_info, ncopies, false, dt,
1806 NULL, NULL, NULL);
1807 else
1808 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1809 return true;
1812 /** Transform. **/
1814 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1816 tree vec_oprnd0 = NULL_TREE, op;
1817 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1818 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1819 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1820 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1821 tree mask_perm_mask = NULL_TREE;
1822 edge pe = loop_preheader_edge (loop);
1823 gimple_seq seq;
1824 basic_block new_bb;
1825 enum { NARROW, NONE, WIDEN } modifier;
1826 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1828 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1829 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1830 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1831 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1832 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1833 scaletype = TREE_VALUE (arglist);
1834 gcc_checking_assert (types_compatible_p (srctype, rettype)
1835 && types_compatible_p (srctype, masktype));
1837 if (nunits == gather_off_nunits)
1838 modifier = NONE;
1839 else if (nunits == gather_off_nunits / 2)
1841 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1842 modifier = WIDEN;
1844 for (i = 0; i < gather_off_nunits; ++i)
1845 sel[i] = i | nunits;
1847 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
1849 else if (nunits == gather_off_nunits * 2)
1851 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1852 modifier = NARROW;
1854 for (i = 0; i < nunits; ++i)
1855 sel[i] = i < gather_off_nunits
1856 ? i : i + nunits - gather_off_nunits;
1858 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
1859 ncopies *= 2;
1860 for (i = 0; i < nunits; ++i)
1861 sel[i] = i | gather_off_nunits;
1862 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
1864 else
1865 gcc_unreachable ();
1867 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1869 ptr = fold_convert (ptrtype, gather_base);
1870 if (!is_gimple_min_invariant (ptr))
1872 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1873 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1874 gcc_assert (!new_bb);
1877 scale = build_int_cst (scaletype, gather_scale);
1879 prev_stmt_info = NULL;
1880 for (j = 0; j < ncopies; ++j)
1882 if (modifier == WIDEN && (j & 1))
1883 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1884 perm_mask, stmt, gsi);
1885 else if (j == 0)
1886 op = vec_oprnd0
1887 = vect_get_vec_def_for_operand (gather_off, stmt);
1888 else
1889 op = vec_oprnd0
1890 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1892 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1894 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1895 == TYPE_VECTOR_SUBPARTS (idxtype));
1896 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
1897 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1898 new_stmt
1899 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1900 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1901 op = var;
1904 if (mask_perm_mask && (j & 1))
1905 mask_op = permute_vec_elements (mask_op, mask_op,
1906 mask_perm_mask, stmt, gsi);
1907 else
1909 if (j == 0)
1910 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
1911 else
1913 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
1914 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1917 mask_op = vec_mask;
1918 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1920 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
1921 == TYPE_VECTOR_SUBPARTS (masktype));
1922 var = vect_get_new_ssa_name (masktype, vect_simple_var);
1923 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
1924 new_stmt
1925 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
1926 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1927 mask_op = var;
1931 new_stmt
1932 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
1933 scale);
1935 if (!useless_type_conversion_p (vectype, rettype))
1937 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
1938 == TYPE_VECTOR_SUBPARTS (rettype));
1939 op = vect_get_new_ssa_name (rettype, vect_simple_var);
1940 gimple_call_set_lhs (new_stmt, op);
1941 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1942 var = make_ssa_name (vec_dest);
1943 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
1944 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1946 else
1948 var = make_ssa_name (vec_dest, new_stmt);
1949 gimple_call_set_lhs (new_stmt, var);
1952 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1954 if (modifier == NARROW)
1956 if ((j & 1) == 0)
1958 prev_res = var;
1959 continue;
1961 var = permute_vec_elements (prev_res, var,
1962 perm_mask, stmt, gsi);
1963 new_stmt = SSA_NAME_DEF_STMT (var);
1966 if (prev_stmt_info == NULL)
1967 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1968 else
1969 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1970 prev_stmt_info = vinfo_for_stmt (new_stmt);
1973 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
1974 from the IL. */
1975 if (STMT_VINFO_RELATED_STMT (stmt_info))
1977 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1978 stmt_info = vinfo_for_stmt (stmt);
1980 tree lhs = gimple_call_lhs (stmt);
1981 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
1982 set_vinfo_for_stmt (new_stmt, stmt_info);
1983 set_vinfo_for_stmt (stmt, NULL);
1984 STMT_VINFO_STMT (stmt_info) = new_stmt;
1985 gsi_replace (gsi, new_stmt, true);
1986 return true;
1988 else if (is_store)
1990 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
1991 prev_stmt_info = NULL;
1992 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
1993 for (i = 0; i < ncopies; i++)
1995 unsigned align, misalign;
1997 if (i == 0)
1999 tree rhs = gimple_call_arg (stmt, 3);
2000 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2001 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2002 /* We should have catched mismatched types earlier. */
2003 gcc_assert (useless_type_conversion_p (vectype,
2004 TREE_TYPE (vec_rhs)));
2005 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2006 NULL_TREE, &dummy, gsi,
2007 &ptr_incr, false, &inv_p);
2008 gcc_assert (!inv_p);
2010 else
2012 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
2013 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2014 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2015 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2016 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2017 TYPE_SIZE_UNIT (vectype));
2020 align = TYPE_ALIGN_UNIT (vectype);
2021 if (aligned_access_p (dr))
2022 misalign = 0;
2023 else if (DR_MISALIGNMENT (dr) == -1)
2025 align = TYPE_ALIGN_UNIT (elem_type);
2026 misalign = 0;
2028 else
2029 misalign = DR_MISALIGNMENT (dr);
2030 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2031 misalign);
2032 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2033 misalign ? misalign & -misalign : align);
2034 new_stmt
2035 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2036 ptr, vec_mask, vec_rhs);
2037 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2038 if (i == 0)
2039 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2040 else
2041 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2042 prev_stmt_info = vinfo_for_stmt (new_stmt);
2045 else
2047 tree vec_mask = NULL_TREE;
2048 prev_stmt_info = NULL;
2049 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2050 for (i = 0; i < ncopies; i++)
2052 unsigned align, misalign;
2054 if (i == 0)
2056 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2057 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2058 NULL_TREE, &dummy, gsi,
2059 &ptr_incr, false, &inv_p);
2060 gcc_assert (!inv_p);
2062 else
2064 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2065 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2066 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2067 TYPE_SIZE_UNIT (vectype));
2070 align = TYPE_ALIGN_UNIT (vectype);
2071 if (aligned_access_p (dr))
2072 misalign = 0;
2073 else if (DR_MISALIGNMENT (dr) == -1)
2075 align = TYPE_ALIGN_UNIT (elem_type);
2076 misalign = 0;
2078 else
2079 misalign = DR_MISALIGNMENT (dr);
2080 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2081 misalign);
2082 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2083 misalign ? misalign & -misalign : align);
2084 new_stmt
2085 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2086 ptr, vec_mask);
2087 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2088 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2089 if (i == 0)
2090 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2091 else
2092 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2093 prev_stmt_info = vinfo_for_stmt (new_stmt);
2097 if (!is_store)
2099 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2100 from the IL. */
2101 if (STMT_VINFO_RELATED_STMT (stmt_info))
2103 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2104 stmt_info = vinfo_for_stmt (stmt);
2106 tree lhs = gimple_call_lhs (stmt);
2107 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2108 set_vinfo_for_stmt (new_stmt, stmt_info);
2109 set_vinfo_for_stmt (stmt, NULL);
2110 STMT_VINFO_STMT (stmt_info) = new_stmt;
2111 gsi_replace (gsi, new_stmt, true);
2114 return true;
2117 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2118 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2119 in a single step. On success, store the binary pack code in
2120 *CONVERT_CODE. */
2122 static bool
2123 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2124 tree_code *convert_code)
2126 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2127 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2128 return false;
2130 tree_code code;
2131 int multi_step_cvt = 0;
2132 auto_vec <tree, 8> interm_types;
2133 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2134 &code, &multi_step_cvt,
2135 &interm_types)
2136 || multi_step_cvt)
2137 return false;
2139 *convert_code = code;
2140 return true;
2143 /* Function vectorizable_call.
2145 Check if GS performs a function call that can be vectorized.
2146 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2147 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2148 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2150 static bool
2151 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2152 slp_tree slp_node)
2154 gcall *stmt;
2155 tree vec_dest;
2156 tree scalar_dest;
2157 tree op, type;
2158 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2159 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2160 tree vectype_out, vectype_in;
2161 int nunits_in;
2162 int nunits_out;
2163 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2164 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2165 vec_info *vinfo = stmt_info->vinfo;
2166 tree fndecl, new_temp, rhs_type;
2167 gimple *def_stmt;
2168 enum vect_def_type dt[3]
2169 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2170 gimple *new_stmt = NULL;
2171 int ncopies, j;
2172 vec<tree> vargs = vNULL;
2173 enum { NARROW, NONE, WIDEN } modifier;
2174 size_t i, nargs;
2175 tree lhs;
2177 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2178 return false;
2180 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2181 && ! vec_stmt)
2182 return false;
2184 /* Is GS a vectorizable call? */
2185 stmt = dyn_cast <gcall *> (gs);
2186 if (!stmt)
2187 return false;
2189 if (gimple_call_internal_p (stmt)
2190 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2191 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2192 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2193 slp_node);
2195 if (gimple_call_lhs (stmt) == NULL_TREE
2196 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2197 return false;
2199 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2201 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2203 /* Process function arguments. */
2204 rhs_type = NULL_TREE;
2205 vectype_in = NULL_TREE;
2206 nargs = gimple_call_num_args (stmt);
2208 /* Bail out if the function has more than three arguments, we do not have
2209 interesting builtin functions to vectorize with more than two arguments
2210 except for fma. No arguments is also not good. */
2211 if (nargs == 0 || nargs > 3)
2212 return false;
2214 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2215 if (gimple_call_internal_p (stmt)
2216 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2218 nargs = 0;
2219 rhs_type = unsigned_type_node;
2222 for (i = 0; i < nargs; i++)
2224 tree opvectype;
2226 op = gimple_call_arg (stmt, i);
2228 /* We can only handle calls with arguments of the same type. */
2229 if (rhs_type
2230 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2232 if (dump_enabled_p ())
2233 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2234 "argument types differ.\n");
2235 return false;
2237 if (!rhs_type)
2238 rhs_type = TREE_TYPE (op);
2240 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2242 if (dump_enabled_p ())
2243 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2244 "use not simple.\n");
2245 return false;
2248 if (!vectype_in)
2249 vectype_in = opvectype;
2250 else if (opvectype
2251 && opvectype != vectype_in)
2253 if (dump_enabled_p ())
2254 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2255 "argument vector types differ.\n");
2256 return false;
2259 /* If all arguments are external or constant defs use a vector type with
2260 the same size as the output vector type. */
2261 if (!vectype_in)
2262 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2263 if (vec_stmt)
2264 gcc_assert (vectype_in);
2265 if (!vectype_in)
2267 if (dump_enabled_p ())
2269 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2270 "no vectype for scalar type ");
2271 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2272 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2275 return false;
2278 /* FORNOW */
2279 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2280 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2281 if (nunits_in == nunits_out / 2)
2282 modifier = NARROW;
2283 else if (nunits_out == nunits_in)
2284 modifier = NONE;
2285 else if (nunits_out == nunits_in / 2)
2286 modifier = WIDEN;
2287 else
2288 return false;
2290 /* We only handle functions that do not read or clobber memory. */
2291 if (gimple_vuse (stmt))
2293 if (dump_enabled_p ())
2294 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2295 "function reads from or writes to memory.\n");
2296 return false;
2299 /* For now, we only vectorize functions if a target specific builtin
2300 is available. TODO -- in some cases, it might be profitable to
2301 insert the calls for pieces of the vector, in order to be able
2302 to vectorize other operations in the loop. */
2303 fndecl = NULL_TREE;
2304 internal_fn ifn = IFN_LAST;
2305 combined_fn cfn = gimple_call_combined_fn (stmt);
2306 tree callee = gimple_call_fndecl (stmt);
2308 /* First try using an internal function. */
2309 tree_code convert_code = ERROR_MARK;
2310 if (cfn != CFN_LAST
2311 && (modifier == NONE
2312 || (modifier == NARROW
2313 && simple_integer_narrowing (vectype_out, vectype_in,
2314 &convert_code))))
2315 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2316 vectype_in);
2318 /* If that fails, try asking for a target-specific built-in function. */
2319 if (ifn == IFN_LAST)
2321 if (cfn != CFN_LAST)
2322 fndecl = targetm.vectorize.builtin_vectorized_function
2323 (cfn, vectype_out, vectype_in);
2324 else
2325 fndecl = targetm.vectorize.builtin_md_vectorized_function
2326 (callee, vectype_out, vectype_in);
2329 if (ifn == IFN_LAST && !fndecl)
2331 if (cfn == CFN_GOMP_SIMD_LANE
2332 && !slp_node
2333 && loop_vinfo
2334 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2335 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2336 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2337 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2339 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2340 { 0, 1, 2, ... vf - 1 } vector. */
2341 gcc_assert (nargs == 0);
2343 else
2345 if (dump_enabled_p ())
2346 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2347 "function is not vectorizable.\n");
2348 return false;
2352 if (slp_node)
2353 ncopies = 1;
2354 else if (modifier == NARROW && ifn == IFN_LAST)
2355 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2356 else
2357 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2359 /* Sanity check: make sure that at least one copy of the vectorized stmt
2360 needs to be generated. */
2361 gcc_assert (ncopies >= 1);
2363 if (!vec_stmt) /* transformation not required. */
2365 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2366 if (dump_enabled_p ())
2367 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2368 "\n");
2369 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2370 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2371 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2372 vec_promote_demote, stmt_info, 0, vect_body);
2374 return true;
2377 /** Transform. **/
2379 if (dump_enabled_p ())
2380 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2382 /* Handle def. */
2383 scalar_dest = gimple_call_lhs (stmt);
2384 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2386 prev_stmt_info = NULL;
2387 if (modifier == NONE || ifn != IFN_LAST)
2389 tree prev_res = NULL_TREE;
2390 for (j = 0; j < ncopies; ++j)
2392 /* Build argument list for the vectorized call. */
2393 if (j == 0)
2394 vargs.create (nargs);
2395 else
2396 vargs.truncate (0);
2398 if (slp_node)
2400 auto_vec<vec<tree> > vec_defs (nargs);
2401 vec<tree> vec_oprnds0;
2403 for (i = 0; i < nargs; i++)
2404 vargs.quick_push (gimple_call_arg (stmt, i));
2405 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2406 vec_oprnds0 = vec_defs[0];
2408 /* Arguments are ready. Create the new vector stmt. */
2409 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2411 size_t k;
2412 for (k = 0; k < nargs; k++)
2414 vec<tree> vec_oprndsk = vec_defs[k];
2415 vargs[k] = vec_oprndsk[i];
2417 if (modifier == NARROW)
2419 tree half_res = make_ssa_name (vectype_in);
2420 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2421 gimple_call_set_lhs (new_stmt, half_res);
2422 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2423 if ((i & 1) == 0)
2425 prev_res = half_res;
2426 continue;
2428 new_temp = make_ssa_name (vec_dest);
2429 new_stmt = gimple_build_assign (new_temp, convert_code,
2430 prev_res, half_res);
2432 else
2434 if (ifn != IFN_LAST)
2435 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2436 else
2437 new_stmt = gimple_build_call_vec (fndecl, vargs);
2438 new_temp = make_ssa_name (vec_dest, new_stmt);
2439 gimple_call_set_lhs (new_stmt, new_temp);
2441 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2442 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2445 for (i = 0; i < nargs; i++)
2447 vec<tree> vec_oprndsi = vec_defs[i];
2448 vec_oprndsi.release ();
2450 continue;
2453 for (i = 0; i < nargs; i++)
2455 op = gimple_call_arg (stmt, i);
2456 if (j == 0)
2457 vec_oprnd0
2458 = vect_get_vec_def_for_operand (op, stmt);
2459 else
2461 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2462 vec_oprnd0
2463 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2466 vargs.quick_push (vec_oprnd0);
2469 if (gimple_call_internal_p (stmt)
2470 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2472 tree *v = XALLOCAVEC (tree, nunits_out);
2473 int k;
2474 for (k = 0; k < nunits_out; ++k)
2475 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2476 tree cst = build_vector (vectype_out, v);
2477 tree new_var
2478 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2479 gimple *init_stmt = gimple_build_assign (new_var, cst);
2480 vect_init_vector_1 (stmt, init_stmt, NULL);
2481 new_temp = make_ssa_name (vec_dest);
2482 new_stmt = gimple_build_assign (new_temp, new_var);
2484 else if (modifier == NARROW)
2486 tree half_res = make_ssa_name (vectype_in);
2487 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2488 gimple_call_set_lhs (new_stmt, half_res);
2489 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2490 if ((j & 1) == 0)
2492 prev_res = half_res;
2493 continue;
2495 new_temp = make_ssa_name (vec_dest);
2496 new_stmt = gimple_build_assign (new_temp, convert_code,
2497 prev_res, half_res);
2499 else
2501 if (ifn != IFN_LAST)
2502 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2503 else
2504 new_stmt = gimple_build_call_vec (fndecl, vargs);
2505 new_temp = make_ssa_name (vec_dest, new_stmt);
2506 gimple_call_set_lhs (new_stmt, new_temp);
2508 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2510 if (j == (modifier == NARROW ? 1 : 0))
2511 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2512 else
2513 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2515 prev_stmt_info = vinfo_for_stmt (new_stmt);
2518 else if (modifier == NARROW)
2520 for (j = 0; j < ncopies; ++j)
2522 /* Build argument list for the vectorized call. */
2523 if (j == 0)
2524 vargs.create (nargs * 2);
2525 else
2526 vargs.truncate (0);
2528 if (slp_node)
2530 auto_vec<vec<tree> > vec_defs (nargs);
2531 vec<tree> vec_oprnds0;
2533 for (i = 0; i < nargs; i++)
2534 vargs.quick_push (gimple_call_arg (stmt, i));
2535 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2536 vec_oprnds0 = vec_defs[0];
2538 /* Arguments are ready. Create the new vector stmt. */
2539 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2541 size_t k;
2542 vargs.truncate (0);
2543 for (k = 0; k < nargs; k++)
2545 vec<tree> vec_oprndsk = vec_defs[k];
2546 vargs.quick_push (vec_oprndsk[i]);
2547 vargs.quick_push (vec_oprndsk[i + 1]);
2549 if (ifn != IFN_LAST)
2550 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2551 else
2552 new_stmt = gimple_build_call_vec (fndecl, vargs);
2553 new_temp = make_ssa_name (vec_dest, new_stmt);
2554 gimple_call_set_lhs (new_stmt, new_temp);
2555 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2556 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2559 for (i = 0; i < nargs; i++)
2561 vec<tree> vec_oprndsi = vec_defs[i];
2562 vec_oprndsi.release ();
2564 continue;
2567 for (i = 0; i < nargs; i++)
2569 op = gimple_call_arg (stmt, i);
2570 if (j == 0)
2572 vec_oprnd0
2573 = vect_get_vec_def_for_operand (op, stmt);
2574 vec_oprnd1
2575 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2577 else
2579 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2580 vec_oprnd0
2581 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2582 vec_oprnd1
2583 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2586 vargs.quick_push (vec_oprnd0);
2587 vargs.quick_push (vec_oprnd1);
2590 new_stmt = gimple_build_call_vec (fndecl, vargs);
2591 new_temp = make_ssa_name (vec_dest, new_stmt);
2592 gimple_call_set_lhs (new_stmt, new_temp);
2593 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2595 if (j == 0)
2596 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2597 else
2598 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2600 prev_stmt_info = vinfo_for_stmt (new_stmt);
2603 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2605 else
2606 /* No current target implements this case. */
2607 return false;
2609 vargs.release ();
2611 /* The call in STMT might prevent it from being removed in dce.
2612 We however cannot remove it here, due to the way the ssa name
2613 it defines is mapped to the new definition. So just replace
2614 rhs of the statement with something harmless. */
2616 if (slp_node)
2617 return true;
2619 type = TREE_TYPE (scalar_dest);
2620 if (is_pattern_stmt_p (stmt_info))
2621 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2622 else
2623 lhs = gimple_call_lhs (stmt);
2625 if (gimple_call_internal_p (stmt)
2626 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2628 /* Replace uses of the lhs of GOMP_SIMD_LANE call outside the loop
2629 with vf - 1 rather than 0, that is the last iteration of the
2630 vectorized loop. */
2631 imm_use_iterator iter;
2632 use_operand_p use_p;
2633 gimple *use_stmt;
2634 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2636 basic_block use_bb = gimple_bb (use_stmt);
2637 if (use_bb
2638 && !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo), use_bb))
2640 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2641 SET_USE (use_p, build_int_cst (TREE_TYPE (lhs),
2642 ncopies * nunits_out - 1));
2643 update_stmt (use_stmt);
2648 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2649 set_vinfo_for_stmt (new_stmt, stmt_info);
2650 set_vinfo_for_stmt (stmt, NULL);
2651 STMT_VINFO_STMT (stmt_info) = new_stmt;
2652 gsi_replace (gsi, new_stmt, false);
2654 return true;
2658 struct simd_call_arg_info
2660 tree vectype;
2661 tree op;
2662 enum vect_def_type dt;
2663 HOST_WIDE_INT linear_step;
2664 unsigned int align;
2665 bool simd_lane_linear;
2668 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
2669 is linear within simd lane (but not within whole loop), note it in
2670 *ARGINFO. */
2672 static void
2673 vect_simd_lane_linear (tree op, struct loop *loop,
2674 struct simd_call_arg_info *arginfo)
2676 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
2678 if (!is_gimple_assign (def_stmt)
2679 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
2680 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
2681 return;
2683 tree base = gimple_assign_rhs1 (def_stmt);
2684 HOST_WIDE_INT linear_step = 0;
2685 tree v = gimple_assign_rhs2 (def_stmt);
2686 while (TREE_CODE (v) == SSA_NAME)
2688 tree t;
2689 def_stmt = SSA_NAME_DEF_STMT (v);
2690 if (is_gimple_assign (def_stmt))
2691 switch (gimple_assign_rhs_code (def_stmt))
2693 case PLUS_EXPR:
2694 t = gimple_assign_rhs2 (def_stmt);
2695 if (linear_step || TREE_CODE (t) != INTEGER_CST)
2696 return;
2697 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
2698 v = gimple_assign_rhs1 (def_stmt);
2699 continue;
2700 case MULT_EXPR:
2701 t = gimple_assign_rhs2 (def_stmt);
2702 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
2703 return;
2704 linear_step = tree_to_shwi (t);
2705 v = gimple_assign_rhs1 (def_stmt);
2706 continue;
2707 CASE_CONVERT:
2708 t = gimple_assign_rhs1 (def_stmt);
2709 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
2710 || (TYPE_PRECISION (TREE_TYPE (v))
2711 < TYPE_PRECISION (TREE_TYPE (t))))
2712 return;
2713 if (!linear_step)
2714 linear_step = 1;
2715 v = t;
2716 continue;
2717 default:
2718 return;
2720 else if (is_gimple_call (def_stmt)
2721 && gimple_call_internal_p (def_stmt)
2722 && gimple_call_internal_fn (def_stmt) == IFN_GOMP_SIMD_LANE
2723 && loop->simduid
2724 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
2725 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
2726 == loop->simduid))
2728 if (!linear_step)
2729 linear_step = 1;
2730 arginfo->linear_step = linear_step;
2731 arginfo->op = base;
2732 arginfo->simd_lane_linear = true;
2733 return;
2738 /* Function vectorizable_simd_clone_call.
2740 Check if STMT performs a function call that can be vectorized
2741 by calling a simd clone of the function.
2742 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2743 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2744 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2746 static bool
2747 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
2748 gimple **vec_stmt, slp_tree slp_node)
2750 tree vec_dest;
2751 tree scalar_dest;
2752 tree op, type;
2753 tree vec_oprnd0 = NULL_TREE;
2754 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2755 tree vectype;
2756 unsigned int nunits;
2757 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2758 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2759 vec_info *vinfo = stmt_info->vinfo;
2760 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2761 tree fndecl, new_temp;
2762 gimple *def_stmt;
2763 gimple *new_stmt = NULL;
2764 int ncopies, j;
2765 auto_vec<simd_call_arg_info> arginfo;
2766 vec<tree> vargs = vNULL;
2767 size_t i, nargs;
2768 tree lhs, rtype, ratype;
2769 vec<constructor_elt, va_gc> *ret_ctor_elts;
2771 /* Is STMT a vectorizable call? */
2772 if (!is_gimple_call (stmt))
2773 return false;
2775 fndecl = gimple_call_fndecl (stmt);
2776 if (fndecl == NULL_TREE)
2777 return false;
2779 struct cgraph_node *node = cgraph_node::get (fndecl);
2780 if (node == NULL || node->simd_clones == NULL)
2781 return false;
2783 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2784 return false;
2786 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2787 && ! vec_stmt)
2788 return false;
2790 if (gimple_call_lhs (stmt)
2791 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2792 return false;
2794 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2796 vectype = STMT_VINFO_VECTYPE (stmt_info);
2798 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2799 return false;
2801 /* FORNOW */
2802 if (slp_node)
2803 return false;
2805 /* Process function arguments. */
2806 nargs = gimple_call_num_args (stmt);
2808 /* Bail out if the function has zero arguments. */
2809 if (nargs == 0)
2810 return false;
2812 arginfo.reserve (nargs, true);
2814 for (i = 0; i < nargs; i++)
2816 simd_call_arg_info thisarginfo;
2817 affine_iv iv;
2819 thisarginfo.linear_step = 0;
2820 thisarginfo.align = 0;
2821 thisarginfo.op = NULL_TREE;
2822 thisarginfo.simd_lane_linear = false;
2824 op = gimple_call_arg (stmt, i);
2825 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
2826 &thisarginfo.vectype)
2827 || thisarginfo.dt == vect_uninitialized_def)
2829 if (dump_enabled_p ())
2830 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2831 "use not simple.\n");
2832 return false;
2835 if (thisarginfo.dt == vect_constant_def
2836 || thisarginfo.dt == vect_external_def)
2837 gcc_assert (thisarginfo.vectype == NULL_TREE);
2838 else
2839 gcc_assert (thisarginfo.vectype != NULL_TREE);
2841 /* For linear arguments, the analyze phase should have saved
2842 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2843 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2844 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
2846 gcc_assert (vec_stmt);
2847 thisarginfo.linear_step
2848 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
2849 thisarginfo.op
2850 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
2851 thisarginfo.simd_lane_linear
2852 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
2853 == boolean_true_node);
2854 /* If loop has been peeled for alignment, we need to adjust it. */
2855 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2856 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2857 if (n1 != n2 && !thisarginfo.simd_lane_linear)
2859 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2860 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
2861 tree opt = TREE_TYPE (thisarginfo.op);
2862 bias = fold_convert (TREE_TYPE (step), bias);
2863 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2864 thisarginfo.op
2865 = fold_build2 (POINTER_TYPE_P (opt)
2866 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2867 thisarginfo.op, bias);
2870 else if (!vec_stmt
2871 && thisarginfo.dt != vect_constant_def
2872 && thisarginfo.dt != vect_external_def
2873 && loop_vinfo
2874 && TREE_CODE (op) == SSA_NAME
2875 && simple_iv (loop, loop_containing_stmt (stmt), op,
2876 &iv, false)
2877 && tree_fits_shwi_p (iv.step))
2879 thisarginfo.linear_step = tree_to_shwi (iv.step);
2880 thisarginfo.op = iv.base;
2882 else if ((thisarginfo.dt == vect_constant_def
2883 || thisarginfo.dt == vect_external_def)
2884 && POINTER_TYPE_P (TREE_TYPE (op)))
2885 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2886 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
2887 linear too. */
2888 if (POINTER_TYPE_P (TREE_TYPE (op))
2889 && !thisarginfo.linear_step
2890 && !vec_stmt
2891 && thisarginfo.dt != vect_constant_def
2892 && thisarginfo.dt != vect_external_def
2893 && loop_vinfo
2894 && !slp_node
2895 && TREE_CODE (op) == SSA_NAME)
2896 vect_simd_lane_linear (op, loop, &thisarginfo);
2898 arginfo.quick_push (thisarginfo);
2901 unsigned int badness = 0;
2902 struct cgraph_node *bestn = NULL;
2903 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2904 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
2905 else
2906 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2907 n = n->simdclone->next_clone)
2909 unsigned int this_badness = 0;
2910 if (n->simdclone->simdlen
2911 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2912 || n->simdclone->nargs != nargs)
2913 continue;
2914 if (n->simdclone->simdlen
2915 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2916 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2917 - exact_log2 (n->simdclone->simdlen)) * 1024;
2918 if (n->simdclone->inbranch)
2919 this_badness += 2048;
2920 int target_badness = targetm.simd_clone.usable (n);
2921 if (target_badness < 0)
2922 continue;
2923 this_badness += target_badness * 512;
2924 /* FORNOW: Have to add code to add the mask argument. */
2925 if (n->simdclone->inbranch)
2926 continue;
2927 for (i = 0; i < nargs; i++)
2929 switch (n->simdclone->args[i].arg_type)
2931 case SIMD_CLONE_ARG_TYPE_VECTOR:
2932 if (!useless_type_conversion_p
2933 (n->simdclone->args[i].orig_type,
2934 TREE_TYPE (gimple_call_arg (stmt, i))))
2935 i = -1;
2936 else if (arginfo[i].dt == vect_constant_def
2937 || arginfo[i].dt == vect_external_def
2938 || arginfo[i].linear_step)
2939 this_badness += 64;
2940 break;
2941 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2942 if (arginfo[i].dt != vect_constant_def
2943 && arginfo[i].dt != vect_external_def)
2944 i = -1;
2945 break;
2946 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2947 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
2948 if (arginfo[i].dt == vect_constant_def
2949 || arginfo[i].dt == vect_external_def
2950 || (arginfo[i].linear_step
2951 != n->simdclone->args[i].linear_step))
2952 i = -1;
2953 break;
2954 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2955 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
2956 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
2957 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
2958 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
2959 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
2960 /* FORNOW */
2961 i = -1;
2962 break;
2963 case SIMD_CLONE_ARG_TYPE_MASK:
2964 gcc_unreachable ();
2966 if (i == (size_t) -1)
2967 break;
2968 if (n->simdclone->args[i].alignment > arginfo[i].align)
2970 i = -1;
2971 break;
2973 if (arginfo[i].align)
2974 this_badness += (exact_log2 (arginfo[i].align)
2975 - exact_log2 (n->simdclone->args[i].alignment));
2977 if (i == (size_t) -1)
2978 continue;
2979 if (bestn == NULL || this_badness < badness)
2981 bestn = n;
2982 badness = this_badness;
2986 if (bestn == NULL)
2987 return false;
2989 for (i = 0; i < nargs; i++)
2990 if ((arginfo[i].dt == vect_constant_def
2991 || arginfo[i].dt == vect_external_def)
2992 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2994 arginfo[i].vectype
2995 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2996 i)));
2997 if (arginfo[i].vectype == NULL
2998 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2999 > bestn->simdclone->simdlen))
3000 return false;
3003 fndecl = bestn->decl;
3004 nunits = bestn->simdclone->simdlen;
3005 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3007 /* If the function isn't const, only allow it in simd loops where user
3008 has asserted that at least nunits consecutive iterations can be
3009 performed using SIMD instructions. */
3010 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3011 && gimple_vuse (stmt))
3012 return false;
3014 /* Sanity check: make sure that at least one copy of the vectorized stmt
3015 needs to be generated. */
3016 gcc_assert (ncopies >= 1);
3018 if (!vec_stmt) /* transformation not required. */
3020 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3021 for (i = 0; i < nargs; i++)
3022 if ((bestn->simdclone->args[i].arg_type
3023 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3024 || (bestn->simdclone->args[i].arg_type
3025 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3027 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3028 + 1);
3029 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3030 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3031 ? size_type_node : TREE_TYPE (arginfo[i].op);
3032 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3033 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3034 tree sll = arginfo[i].simd_lane_linear
3035 ? boolean_true_node : boolean_false_node;
3036 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3038 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3039 if (dump_enabled_p ())
3040 dump_printf_loc (MSG_NOTE, vect_location,
3041 "=== vectorizable_simd_clone_call ===\n");
3042 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3043 return true;
3046 /** Transform. **/
3048 if (dump_enabled_p ())
3049 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3051 /* Handle def. */
3052 scalar_dest = gimple_call_lhs (stmt);
3053 vec_dest = NULL_TREE;
3054 rtype = NULL_TREE;
3055 ratype = NULL_TREE;
3056 if (scalar_dest)
3058 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3059 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3060 if (TREE_CODE (rtype) == ARRAY_TYPE)
3062 ratype = rtype;
3063 rtype = TREE_TYPE (ratype);
3067 prev_stmt_info = NULL;
3068 for (j = 0; j < ncopies; ++j)
3070 /* Build argument list for the vectorized call. */
3071 if (j == 0)
3072 vargs.create (nargs);
3073 else
3074 vargs.truncate (0);
3076 for (i = 0; i < nargs; i++)
3078 unsigned int k, l, m, o;
3079 tree atype;
3080 op = gimple_call_arg (stmt, i);
3081 switch (bestn->simdclone->args[i].arg_type)
3083 case SIMD_CLONE_ARG_TYPE_VECTOR:
3084 atype = bestn->simdclone->args[i].vector_type;
3085 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3086 for (m = j * o; m < (j + 1) * o; m++)
3088 if (TYPE_VECTOR_SUBPARTS (atype)
3089 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3091 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3092 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3093 / TYPE_VECTOR_SUBPARTS (atype));
3094 gcc_assert ((k & (k - 1)) == 0);
3095 if (m == 0)
3096 vec_oprnd0
3097 = vect_get_vec_def_for_operand (op, stmt);
3098 else
3100 vec_oprnd0 = arginfo[i].op;
3101 if ((m & (k - 1)) == 0)
3102 vec_oprnd0
3103 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3104 vec_oprnd0);
3106 arginfo[i].op = vec_oprnd0;
3107 vec_oprnd0
3108 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3109 size_int (prec),
3110 bitsize_int ((m & (k - 1)) * prec));
3111 new_stmt
3112 = gimple_build_assign (make_ssa_name (atype),
3113 vec_oprnd0);
3114 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3115 vargs.safe_push (gimple_assign_lhs (new_stmt));
3117 else
3119 k = (TYPE_VECTOR_SUBPARTS (atype)
3120 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3121 gcc_assert ((k & (k - 1)) == 0);
3122 vec<constructor_elt, va_gc> *ctor_elts;
3123 if (k != 1)
3124 vec_alloc (ctor_elts, k);
3125 else
3126 ctor_elts = NULL;
3127 for (l = 0; l < k; l++)
3129 if (m == 0 && l == 0)
3130 vec_oprnd0
3131 = vect_get_vec_def_for_operand (op, stmt);
3132 else
3133 vec_oprnd0
3134 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3135 arginfo[i].op);
3136 arginfo[i].op = vec_oprnd0;
3137 if (k == 1)
3138 break;
3139 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3140 vec_oprnd0);
3142 if (k == 1)
3143 vargs.safe_push (vec_oprnd0);
3144 else
3146 vec_oprnd0 = build_constructor (atype, ctor_elts);
3147 new_stmt
3148 = gimple_build_assign (make_ssa_name (atype),
3149 vec_oprnd0);
3150 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3151 vargs.safe_push (gimple_assign_lhs (new_stmt));
3155 break;
3156 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3157 vargs.safe_push (op);
3158 break;
3159 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3160 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3161 if (j == 0)
3163 gimple_seq stmts;
3164 arginfo[i].op
3165 = force_gimple_operand (arginfo[i].op, &stmts, true,
3166 NULL_TREE);
3167 if (stmts != NULL)
3169 basic_block new_bb;
3170 edge pe = loop_preheader_edge (loop);
3171 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3172 gcc_assert (!new_bb);
3174 if (arginfo[i].simd_lane_linear)
3176 vargs.safe_push (arginfo[i].op);
3177 break;
3179 tree phi_res = copy_ssa_name (op);
3180 gphi *new_phi = create_phi_node (phi_res, loop->header);
3181 set_vinfo_for_stmt (new_phi,
3182 new_stmt_vec_info (new_phi, loop_vinfo));
3183 add_phi_arg (new_phi, arginfo[i].op,
3184 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3185 enum tree_code code
3186 = POINTER_TYPE_P (TREE_TYPE (op))
3187 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3188 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3189 ? sizetype : TREE_TYPE (op);
3190 widest_int cst
3191 = wi::mul (bestn->simdclone->args[i].linear_step,
3192 ncopies * nunits);
3193 tree tcst = wide_int_to_tree (type, cst);
3194 tree phi_arg = copy_ssa_name (op);
3195 new_stmt
3196 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3197 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3198 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3199 set_vinfo_for_stmt (new_stmt,
3200 new_stmt_vec_info (new_stmt, loop_vinfo));
3201 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3202 UNKNOWN_LOCATION);
3203 arginfo[i].op = phi_res;
3204 vargs.safe_push (phi_res);
3206 else
3208 enum tree_code code
3209 = POINTER_TYPE_P (TREE_TYPE (op))
3210 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3211 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3212 ? sizetype : TREE_TYPE (op);
3213 widest_int cst
3214 = wi::mul (bestn->simdclone->args[i].linear_step,
3215 j * nunits);
3216 tree tcst = wide_int_to_tree (type, cst);
3217 new_temp = make_ssa_name (TREE_TYPE (op));
3218 new_stmt = gimple_build_assign (new_temp, code,
3219 arginfo[i].op, tcst);
3220 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3221 vargs.safe_push (new_temp);
3223 break;
3224 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3225 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3226 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3227 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3228 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3229 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3230 default:
3231 gcc_unreachable ();
3235 new_stmt = gimple_build_call_vec (fndecl, vargs);
3236 if (vec_dest)
3238 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3239 if (ratype)
3240 new_temp = create_tmp_var (ratype);
3241 else if (TYPE_VECTOR_SUBPARTS (vectype)
3242 == TYPE_VECTOR_SUBPARTS (rtype))
3243 new_temp = make_ssa_name (vec_dest, new_stmt);
3244 else
3245 new_temp = make_ssa_name (rtype, new_stmt);
3246 gimple_call_set_lhs (new_stmt, new_temp);
3248 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3250 if (vec_dest)
3252 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3254 unsigned int k, l;
3255 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3256 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3257 gcc_assert ((k & (k - 1)) == 0);
3258 for (l = 0; l < k; l++)
3260 tree t;
3261 if (ratype)
3263 t = build_fold_addr_expr (new_temp);
3264 t = build2 (MEM_REF, vectype, t,
3265 build_int_cst (TREE_TYPE (t),
3266 l * prec / BITS_PER_UNIT));
3268 else
3269 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3270 size_int (prec), bitsize_int (l * prec));
3271 new_stmt
3272 = gimple_build_assign (make_ssa_name (vectype), t);
3273 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3274 if (j == 0 && l == 0)
3275 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3276 else
3277 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3279 prev_stmt_info = vinfo_for_stmt (new_stmt);
3282 if (ratype)
3284 tree clobber = build_constructor (ratype, NULL);
3285 TREE_THIS_VOLATILE (clobber) = 1;
3286 new_stmt = gimple_build_assign (new_temp, clobber);
3287 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3289 continue;
3291 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3293 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3294 / TYPE_VECTOR_SUBPARTS (rtype));
3295 gcc_assert ((k & (k - 1)) == 0);
3296 if ((j & (k - 1)) == 0)
3297 vec_alloc (ret_ctor_elts, k);
3298 if (ratype)
3300 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3301 for (m = 0; m < o; m++)
3303 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3304 size_int (m), NULL_TREE, NULL_TREE);
3305 new_stmt
3306 = gimple_build_assign (make_ssa_name (rtype), tem);
3307 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3308 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3309 gimple_assign_lhs (new_stmt));
3311 tree clobber = build_constructor (ratype, NULL);
3312 TREE_THIS_VOLATILE (clobber) = 1;
3313 new_stmt = gimple_build_assign (new_temp, clobber);
3314 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3316 else
3317 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3318 if ((j & (k - 1)) != k - 1)
3319 continue;
3320 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3321 new_stmt
3322 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3323 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3325 if ((unsigned) j == k - 1)
3326 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3327 else
3328 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3330 prev_stmt_info = vinfo_for_stmt (new_stmt);
3331 continue;
3333 else if (ratype)
3335 tree t = build_fold_addr_expr (new_temp);
3336 t = build2 (MEM_REF, vectype, t,
3337 build_int_cst (TREE_TYPE (t), 0));
3338 new_stmt
3339 = gimple_build_assign (make_ssa_name (vec_dest), t);
3340 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3341 tree clobber = build_constructor (ratype, NULL);
3342 TREE_THIS_VOLATILE (clobber) = 1;
3343 vect_finish_stmt_generation (stmt,
3344 gimple_build_assign (new_temp,
3345 clobber), gsi);
3349 if (j == 0)
3350 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3351 else
3352 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3354 prev_stmt_info = vinfo_for_stmt (new_stmt);
3357 vargs.release ();
3359 /* The call in STMT might prevent it from being removed in dce.
3360 We however cannot remove it here, due to the way the ssa name
3361 it defines is mapped to the new definition. So just replace
3362 rhs of the statement with something harmless. */
3364 if (slp_node)
3365 return true;
3367 if (scalar_dest)
3369 type = TREE_TYPE (scalar_dest);
3370 if (is_pattern_stmt_p (stmt_info))
3371 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3372 else
3373 lhs = gimple_call_lhs (stmt);
3374 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3376 else
3377 new_stmt = gimple_build_nop ();
3378 set_vinfo_for_stmt (new_stmt, stmt_info);
3379 set_vinfo_for_stmt (stmt, NULL);
3380 STMT_VINFO_STMT (stmt_info) = new_stmt;
3381 gsi_replace (gsi, new_stmt, true);
3382 unlink_stmt_vdef (stmt);
3384 return true;
3388 /* Function vect_gen_widened_results_half
3390 Create a vector stmt whose code, type, number of arguments, and result
3391 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3392 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3393 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3394 needs to be created (DECL is a function-decl of a target-builtin).
3395 STMT is the original scalar stmt that we are vectorizing. */
3397 static gimple *
3398 vect_gen_widened_results_half (enum tree_code code,
3399 tree decl,
3400 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3401 tree vec_dest, gimple_stmt_iterator *gsi,
3402 gimple *stmt)
3404 gimple *new_stmt;
3405 tree new_temp;
3407 /* Generate half of the widened result: */
3408 if (code == CALL_EXPR)
3410 /* Target specific support */
3411 if (op_type == binary_op)
3412 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3413 else
3414 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3415 new_temp = make_ssa_name (vec_dest, new_stmt);
3416 gimple_call_set_lhs (new_stmt, new_temp);
3418 else
3420 /* Generic support */
3421 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3422 if (op_type != binary_op)
3423 vec_oprnd1 = NULL;
3424 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3425 new_temp = make_ssa_name (vec_dest, new_stmt);
3426 gimple_assign_set_lhs (new_stmt, new_temp);
3428 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3430 return new_stmt;
3434 /* Get vectorized definitions for loop-based vectorization. For the first
3435 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3436 scalar operand), and for the rest we get a copy with
3437 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3438 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3439 The vectors are collected into VEC_OPRNDS. */
3441 static void
3442 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3443 vec<tree> *vec_oprnds, int multi_step_cvt)
3445 tree vec_oprnd;
3447 /* Get first vector operand. */
3448 /* All the vector operands except the very first one (that is scalar oprnd)
3449 are stmt copies. */
3450 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3451 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3452 else
3453 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3455 vec_oprnds->quick_push (vec_oprnd);
3457 /* Get second vector operand. */
3458 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3459 vec_oprnds->quick_push (vec_oprnd);
3461 *oprnd = vec_oprnd;
3463 /* For conversion in multiple steps, continue to get operands
3464 recursively. */
3465 if (multi_step_cvt)
3466 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3470 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3471 For multi-step conversions store the resulting vectors and call the function
3472 recursively. */
3474 static void
3475 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3476 int multi_step_cvt, gimple *stmt,
3477 vec<tree> vec_dsts,
3478 gimple_stmt_iterator *gsi,
3479 slp_tree slp_node, enum tree_code code,
3480 stmt_vec_info *prev_stmt_info)
3482 unsigned int i;
3483 tree vop0, vop1, new_tmp, vec_dest;
3484 gimple *new_stmt;
3485 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3487 vec_dest = vec_dsts.pop ();
3489 for (i = 0; i < vec_oprnds->length (); i += 2)
3491 /* Create demotion operation. */
3492 vop0 = (*vec_oprnds)[i];
3493 vop1 = (*vec_oprnds)[i + 1];
3494 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3495 new_tmp = make_ssa_name (vec_dest, new_stmt);
3496 gimple_assign_set_lhs (new_stmt, new_tmp);
3497 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3499 if (multi_step_cvt)
3500 /* Store the resulting vector for next recursive call. */
3501 (*vec_oprnds)[i/2] = new_tmp;
3502 else
3504 /* This is the last step of the conversion sequence. Store the
3505 vectors in SLP_NODE or in vector info of the scalar statement
3506 (or in STMT_VINFO_RELATED_STMT chain). */
3507 if (slp_node)
3508 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3509 else
3511 if (!*prev_stmt_info)
3512 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3513 else
3514 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3516 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3521 /* For multi-step demotion operations we first generate demotion operations
3522 from the source type to the intermediate types, and then combine the
3523 results (stored in VEC_OPRNDS) in demotion operation to the destination
3524 type. */
3525 if (multi_step_cvt)
3527 /* At each level of recursion we have half of the operands we had at the
3528 previous level. */
3529 vec_oprnds->truncate ((i+1)/2);
3530 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3531 stmt, vec_dsts, gsi, slp_node,
3532 VEC_PACK_TRUNC_EXPR,
3533 prev_stmt_info);
3536 vec_dsts.quick_push (vec_dest);
3540 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3541 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3542 the resulting vectors and call the function recursively. */
3544 static void
3545 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3546 vec<tree> *vec_oprnds1,
3547 gimple *stmt, tree vec_dest,
3548 gimple_stmt_iterator *gsi,
3549 enum tree_code code1,
3550 enum tree_code code2, tree decl1,
3551 tree decl2, int op_type)
3553 int i;
3554 tree vop0, vop1, new_tmp1, new_tmp2;
3555 gimple *new_stmt1, *new_stmt2;
3556 vec<tree> vec_tmp = vNULL;
3558 vec_tmp.create (vec_oprnds0->length () * 2);
3559 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3561 if (op_type == binary_op)
3562 vop1 = (*vec_oprnds1)[i];
3563 else
3564 vop1 = NULL_TREE;
3566 /* Generate the two halves of promotion operation. */
3567 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3568 op_type, vec_dest, gsi, stmt);
3569 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3570 op_type, vec_dest, gsi, stmt);
3571 if (is_gimple_call (new_stmt1))
3573 new_tmp1 = gimple_call_lhs (new_stmt1);
3574 new_tmp2 = gimple_call_lhs (new_stmt2);
3576 else
3578 new_tmp1 = gimple_assign_lhs (new_stmt1);
3579 new_tmp2 = gimple_assign_lhs (new_stmt2);
3582 /* Store the results for the next step. */
3583 vec_tmp.quick_push (new_tmp1);
3584 vec_tmp.quick_push (new_tmp2);
3587 vec_oprnds0->release ();
3588 *vec_oprnds0 = vec_tmp;
3592 /* Check if STMT performs a conversion operation, that can be vectorized.
3593 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3594 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3595 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3597 static bool
3598 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
3599 gimple **vec_stmt, slp_tree slp_node)
3601 tree vec_dest;
3602 tree scalar_dest;
3603 tree op0, op1 = NULL_TREE;
3604 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3605 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3606 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3607 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3608 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3609 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3610 tree new_temp;
3611 gimple *def_stmt;
3612 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3613 gimple *new_stmt = NULL;
3614 stmt_vec_info prev_stmt_info;
3615 int nunits_in;
3616 int nunits_out;
3617 tree vectype_out, vectype_in;
3618 int ncopies, i, j;
3619 tree lhs_type, rhs_type;
3620 enum { NARROW, NONE, WIDEN } modifier;
3621 vec<tree> vec_oprnds0 = vNULL;
3622 vec<tree> vec_oprnds1 = vNULL;
3623 tree vop0;
3624 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3625 vec_info *vinfo = stmt_info->vinfo;
3626 int multi_step_cvt = 0;
3627 vec<tree> vec_dsts = vNULL;
3628 vec<tree> interm_types = vNULL;
3629 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3630 int op_type;
3631 machine_mode rhs_mode;
3632 unsigned short fltsz;
3634 /* Is STMT a vectorizable conversion? */
3636 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3637 return false;
3639 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3640 && ! vec_stmt)
3641 return false;
3643 if (!is_gimple_assign (stmt))
3644 return false;
3646 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3647 return false;
3649 code = gimple_assign_rhs_code (stmt);
3650 if (!CONVERT_EXPR_CODE_P (code)
3651 && code != FIX_TRUNC_EXPR
3652 && code != FLOAT_EXPR
3653 && code != WIDEN_MULT_EXPR
3654 && code != WIDEN_LSHIFT_EXPR)
3655 return false;
3657 op_type = TREE_CODE_LENGTH (code);
3659 /* Check types of lhs and rhs. */
3660 scalar_dest = gimple_assign_lhs (stmt);
3661 lhs_type = TREE_TYPE (scalar_dest);
3662 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3664 op0 = gimple_assign_rhs1 (stmt);
3665 rhs_type = TREE_TYPE (op0);
3667 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3668 && !((INTEGRAL_TYPE_P (lhs_type)
3669 && INTEGRAL_TYPE_P (rhs_type))
3670 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3671 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3672 return false;
3674 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
3675 && ((INTEGRAL_TYPE_P (lhs_type)
3676 && (TYPE_PRECISION (lhs_type)
3677 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3678 || (INTEGRAL_TYPE_P (rhs_type)
3679 && (TYPE_PRECISION (rhs_type)
3680 != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))))
3682 if (dump_enabled_p ())
3683 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3684 "type conversion to/from bit-precision unsupported."
3685 "\n");
3686 return false;
3689 /* Check the operands of the operation. */
3690 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
3692 if (dump_enabled_p ())
3693 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3694 "use not simple.\n");
3695 return false;
3697 if (op_type == binary_op)
3699 bool ok;
3701 op1 = gimple_assign_rhs2 (stmt);
3702 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3703 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3704 OP1. */
3705 if (CONSTANT_CLASS_P (op0))
3706 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
3707 else
3708 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
3710 if (!ok)
3712 if (dump_enabled_p ())
3713 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3714 "use not simple.\n");
3715 return false;
3719 /* If op0 is an external or constant defs use a vector type of
3720 the same size as the output vector type. */
3721 if (!vectype_in)
3722 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3723 if (vec_stmt)
3724 gcc_assert (vectype_in);
3725 if (!vectype_in)
3727 if (dump_enabled_p ())
3729 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3730 "no vectype for scalar type ");
3731 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3732 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3735 return false;
3738 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3739 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
3741 if (dump_enabled_p ())
3743 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3744 "can't convert between boolean and non "
3745 "boolean vectors");
3746 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3747 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3750 return false;
3753 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3754 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3755 if (nunits_in < nunits_out)
3756 modifier = NARROW;
3757 else if (nunits_out == nunits_in)
3758 modifier = NONE;
3759 else
3760 modifier = WIDEN;
3762 /* Multiple types in SLP are handled by creating the appropriate number of
3763 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3764 case of SLP. */
3765 if (slp_node)
3766 ncopies = 1;
3767 else if (modifier == NARROW)
3768 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3769 else
3770 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3772 /* Sanity check: make sure that at least one copy of the vectorized stmt
3773 needs to be generated. */
3774 gcc_assert (ncopies >= 1);
3776 /* Supportable by target? */
3777 switch (modifier)
3779 case NONE:
3780 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3781 return false;
3782 if (supportable_convert_operation (code, vectype_out, vectype_in,
3783 &decl1, &code1))
3784 break;
3785 /* FALLTHRU */
3786 unsupported:
3787 if (dump_enabled_p ())
3788 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3789 "conversion not supported by target.\n");
3790 return false;
3792 case WIDEN:
3793 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3794 &code1, &code2, &multi_step_cvt,
3795 &interm_types))
3797 /* Binary widening operation can only be supported directly by the
3798 architecture. */
3799 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3800 break;
3803 if (code != FLOAT_EXPR
3804 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3805 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3806 goto unsupported;
3808 rhs_mode = TYPE_MODE (rhs_type);
3809 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3810 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3811 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3812 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3814 cvt_type
3815 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3816 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3817 if (cvt_type == NULL_TREE)
3818 goto unsupported;
3820 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3822 if (!supportable_convert_operation (code, vectype_out,
3823 cvt_type, &decl1, &codecvt1))
3824 goto unsupported;
3826 else if (!supportable_widening_operation (code, stmt, vectype_out,
3827 cvt_type, &codecvt1,
3828 &codecvt2, &multi_step_cvt,
3829 &interm_types))
3830 continue;
3831 else
3832 gcc_assert (multi_step_cvt == 0);
3834 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3835 vectype_in, &code1, &code2,
3836 &multi_step_cvt, &interm_types))
3837 break;
3840 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3841 goto unsupported;
3843 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3844 codecvt2 = ERROR_MARK;
3845 else
3847 multi_step_cvt++;
3848 interm_types.safe_push (cvt_type);
3849 cvt_type = NULL_TREE;
3851 break;
3853 case NARROW:
3854 gcc_assert (op_type == unary_op);
3855 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3856 &code1, &multi_step_cvt,
3857 &interm_types))
3858 break;
3860 if (code != FIX_TRUNC_EXPR
3861 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3862 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3863 goto unsupported;
3865 rhs_mode = TYPE_MODE (rhs_type);
3866 cvt_type
3867 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3868 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3869 if (cvt_type == NULL_TREE)
3870 goto unsupported;
3871 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3872 &decl1, &codecvt1))
3873 goto unsupported;
3874 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3875 &code1, &multi_step_cvt,
3876 &interm_types))
3877 break;
3878 goto unsupported;
3880 default:
3881 gcc_unreachable ();
3884 if (!vec_stmt) /* transformation not required. */
3886 if (dump_enabled_p ())
3887 dump_printf_loc (MSG_NOTE, vect_location,
3888 "=== vectorizable_conversion ===\n");
3889 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3891 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3892 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3894 else if (modifier == NARROW)
3896 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3897 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3899 else
3901 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3902 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3904 interm_types.release ();
3905 return true;
3908 /** Transform. **/
3909 if (dump_enabled_p ())
3910 dump_printf_loc (MSG_NOTE, vect_location,
3911 "transform conversion. ncopies = %d.\n", ncopies);
3913 if (op_type == binary_op)
3915 if (CONSTANT_CLASS_P (op0))
3916 op0 = fold_convert (TREE_TYPE (op1), op0);
3917 else if (CONSTANT_CLASS_P (op1))
3918 op1 = fold_convert (TREE_TYPE (op0), op1);
3921 /* In case of multi-step conversion, we first generate conversion operations
3922 to the intermediate types, and then from that types to the final one.
3923 We create vector destinations for the intermediate type (TYPES) received
3924 from supportable_*_operation, and store them in the correct order
3925 for future use in vect_create_vectorized_*_stmts (). */
3926 vec_dsts.create (multi_step_cvt + 1);
3927 vec_dest = vect_create_destination_var (scalar_dest,
3928 (cvt_type && modifier == WIDEN)
3929 ? cvt_type : vectype_out);
3930 vec_dsts.quick_push (vec_dest);
3932 if (multi_step_cvt)
3934 for (i = interm_types.length () - 1;
3935 interm_types.iterate (i, &intermediate_type); i--)
3937 vec_dest = vect_create_destination_var (scalar_dest,
3938 intermediate_type);
3939 vec_dsts.quick_push (vec_dest);
3943 if (cvt_type)
3944 vec_dest = vect_create_destination_var (scalar_dest,
3945 modifier == WIDEN
3946 ? vectype_out : cvt_type);
3948 if (!slp_node)
3950 if (modifier == WIDEN)
3952 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3953 if (op_type == binary_op)
3954 vec_oprnds1.create (1);
3956 else if (modifier == NARROW)
3957 vec_oprnds0.create (
3958 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3960 else if (code == WIDEN_LSHIFT_EXPR)
3961 vec_oprnds1.create (slp_node->vec_stmts_size);
3963 last_oprnd = op0;
3964 prev_stmt_info = NULL;
3965 switch (modifier)
3967 case NONE:
3968 for (j = 0; j < ncopies; j++)
3970 if (j == 0)
3971 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3972 -1);
3973 else
3974 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3976 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3978 /* Arguments are ready, create the new vector stmt. */
3979 if (code1 == CALL_EXPR)
3981 new_stmt = gimple_build_call (decl1, 1, vop0);
3982 new_temp = make_ssa_name (vec_dest, new_stmt);
3983 gimple_call_set_lhs (new_stmt, new_temp);
3985 else
3987 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3988 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
3989 new_temp = make_ssa_name (vec_dest, new_stmt);
3990 gimple_assign_set_lhs (new_stmt, new_temp);
3993 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3994 if (slp_node)
3995 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3996 else
3998 if (!prev_stmt_info)
3999 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4000 else
4001 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4002 prev_stmt_info = vinfo_for_stmt (new_stmt);
4006 break;
4008 case WIDEN:
4009 /* In case the vectorization factor (VF) is bigger than the number
4010 of elements that we can fit in a vectype (nunits), we have to
4011 generate more than one vector stmt - i.e - we need to "unroll"
4012 the vector stmt by a factor VF/nunits. */
4013 for (j = 0; j < ncopies; j++)
4015 /* Handle uses. */
4016 if (j == 0)
4018 if (slp_node)
4020 if (code == WIDEN_LSHIFT_EXPR)
4022 unsigned int k;
4024 vec_oprnd1 = op1;
4025 /* Store vec_oprnd1 for every vector stmt to be created
4026 for SLP_NODE. We check during the analysis that all
4027 the shift arguments are the same. */
4028 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4029 vec_oprnds1.quick_push (vec_oprnd1);
4031 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4032 slp_node, -1);
4034 else
4035 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4036 &vec_oprnds1, slp_node, -1);
4038 else
4040 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4041 vec_oprnds0.quick_push (vec_oprnd0);
4042 if (op_type == binary_op)
4044 if (code == WIDEN_LSHIFT_EXPR)
4045 vec_oprnd1 = op1;
4046 else
4047 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4048 vec_oprnds1.quick_push (vec_oprnd1);
4052 else
4054 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4055 vec_oprnds0.truncate (0);
4056 vec_oprnds0.quick_push (vec_oprnd0);
4057 if (op_type == binary_op)
4059 if (code == WIDEN_LSHIFT_EXPR)
4060 vec_oprnd1 = op1;
4061 else
4062 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4063 vec_oprnd1);
4064 vec_oprnds1.truncate (0);
4065 vec_oprnds1.quick_push (vec_oprnd1);
4069 /* Arguments are ready. Create the new vector stmts. */
4070 for (i = multi_step_cvt; i >= 0; i--)
4072 tree this_dest = vec_dsts[i];
4073 enum tree_code c1 = code1, c2 = code2;
4074 if (i == 0 && codecvt2 != ERROR_MARK)
4076 c1 = codecvt1;
4077 c2 = codecvt2;
4079 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4080 &vec_oprnds1,
4081 stmt, this_dest, gsi,
4082 c1, c2, decl1, decl2,
4083 op_type);
4086 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4088 if (cvt_type)
4090 if (codecvt1 == CALL_EXPR)
4092 new_stmt = gimple_build_call (decl1, 1, vop0);
4093 new_temp = make_ssa_name (vec_dest, new_stmt);
4094 gimple_call_set_lhs (new_stmt, new_temp);
4096 else
4098 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4099 new_temp = make_ssa_name (vec_dest);
4100 new_stmt = gimple_build_assign (new_temp, codecvt1,
4101 vop0);
4104 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4106 else
4107 new_stmt = SSA_NAME_DEF_STMT (vop0);
4109 if (slp_node)
4110 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4111 else
4113 if (!prev_stmt_info)
4114 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4115 else
4116 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4117 prev_stmt_info = vinfo_for_stmt (new_stmt);
4122 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4123 break;
4125 case NARROW:
4126 /* In case the vectorization factor (VF) is bigger than the number
4127 of elements that we can fit in a vectype (nunits), we have to
4128 generate more than one vector stmt - i.e - we need to "unroll"
4129 the vector stmt by a factor VF/nunits. */
4130 for (j = 0; j < ncopies; j++)
4132 /* Handle uses. */
4133 if (slp_node)
4134 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4135 slp_node, -1);
4136 else
4138 vec_oprnds0.truncate (0);
4139 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4140 vect_pow2 (multi_step_cvt) - 1);
4143 /* Arguments are ready. Create the new vector stmts. */
4144 if (cvt_type)
4145 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4147 if (codecvt1 == CALL_EXPR)
4149 new_stmt = gimple_build_call (decl1, 1, vop0);
4150 new_temp = make_ssa_name (vec_dest, new_stmt);
4151 gimple_call_set_lhs (new_stmt, new_temp);
4153 else
4155 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4156 new_temp = make_ssa_name (vec_dest);
4157 new_stmt = gimple_build_assign (new_temp, codecvt1,
4158 vop0);
4161 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4162 vec_oprnds0[i] = new_temp;
4165 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4166 stmt, vec_dsts, gsi,
4167 slp_node, code1,
4168 &prev_stmt_info);
4171 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4172 break;
4175 vec_oprnds0.release ();
4176 vec_oprnds1.release ();
4177 vec_dsts.release ();
4178 interm_types.release ();
4180 return true;
4184 /* Function vectorizable_assignment.
4186 Check if STMT performs an assignment (copy) that can be vectorized.
4187 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4188 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4189 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4191 static bool
4192 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4193 gimple **vec_stmt, slp_tree slp_node)
4195 tree vec_dest;
4196 tree scalar_dest;
4197 tree op;
4198 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4199 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4200 tree new_temp;
4201 gimple *def_stmt;
4202 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4203 int ncopies;
4204 int i, j;
4205 vec<tree> vec_oprnds = vNULL;
4206 tree vop;
4207 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4208 vec_info *vinfo = stmt_info->vinfo;
4209 gimple *new_stmt = NULL;
4210 stmt_vec_info prev_stmt_info = NULL;
4211 enum tree_code code;
4212 tree vectype_in;
4214 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4215 return false;
4217 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4218 && ! vec_stmt)
4219 return false;
4221 /* Is vectorizable assignment? */
4222 if (!is_gimple_assign (stmt))
4223 return false;
4225 scalar_dest = gimple_assign_lhs (stmt);
4226 if (TREE_CODE (scalar_dest) != SSA_NAME)
4227 return false;
4229 code = gimple_assign_rhs_code (stmt);
4230 if (gimple_assign_single_p (stmt)
4231 || code == PAREN_EXPR
4232 || CONVERT_EXPR_CODE_P (code))
4233 op = gimple_assign_rhs1 (stmt);
4234 else
4235 return false;
4237 if (code == VIEW_CONVERT_EXPR)
4238 op = TREE_OPERAND (op, 0);
4240 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4241 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4243 /* Multiple types in SLP are handled by creating the appropriate number of
4244 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4245 case of SLP. */
4246 if (slp_node)
4247 ncopies = 1;
4248 else
4249 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4251 gcc_assert (ncopies >= 1);
4253 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4255 if (dump_enabled_p ())
4256 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4257 "use not simple.\n");
4258 return false;
4261 /* We can handle NOP_EXPR conversions that do not change the number
4262 of elements or the vector size. */
4263 if ((CONVERT_EXPR_CODE_P (code)
4264 || code == VIEW_CONVERT_EXPR)
4265 && (!vectype_in
4266 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4267 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4268 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4269 return false;
4271 /* We do not handle bit-precision changes. */
4272 if ((CONVERT_EXPR_CODE_P (code)
4273 || code == VIEW_CONVERT_EXPR)
4274 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4275 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4276 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4277 || ((TYPE_PRECISION (TREE_TYPE (op))
4278 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4279 /* But a conversion that does not change the bit-pattern is ok. */
4280 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4281 > TYPE_PRECISION (TREE_TYPE (op)))
4282 && TYPE_UNSIGNED (TREE_TYPE (op)))
4283 /* Conversion between boolean types of different sizes is
4284 a simple assignment in case their vectypes are same
4285 boolean vectors. */
4286 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4287 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4289 if (dump_enabled_p ())
4290 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4291 "type conversion to/from bit-precision "
4292 "unsupported.\n");
4293 return false;
4296 if (!vec_stmt) /* transformation not required. */
4298 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4299 if (dump_enabled_p ())
4300 dump_printf_loc (MSG_NOTE, vect_location,
4301 "=== vectorizable_assignment ===\n");
4302 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4303 return true;
4306 /** Transform. **/
4307 if (dump_enabled_p ())
4308 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4310 /* Handle def. */
4311 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4313 /* Handle use. */
4314 for (j = 0; j < ncopies; j++)
4316 /* Handle uses. */
4317 if (j == 0)
4318 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4319 else
4320 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4322 /* Arguments are ready. create the new vector stmt. */
4323 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4325 if (CONVERT_EXPR_CODE_P (code)
4326 || code == VIEW_CONVERT_EXPR)
4327 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4328 new_stmt = gimple_build_assign (vec_dest, vop);
4329 new_temp = make_ssa_name (vec_dest, new_stmt);
4330 gimple_assign_set_lhs (new_stmt, new_temp);
4331 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4332 if (slp_node)
4333 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4336 if (slp_node)
4337 continue;
4339 if (j == 0)
4340 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4341 else
4342 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4344 prev_stmt_info = vinfo_for_stmt (new_stmt);
4347 vec_oprnds.release ();
4348 return true;
4352 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4353 either as shift by a scalar or by a vector. */
4355 bool
4356 vect_supportable_shift (enum tree_code code, tree scalar_type)
4359 machine_mode vec_mode;
4360 optab optab;
4361 int icode;
4362 tree vectype;
4364 vectype = get_vectype_for_scalar_type (scalar_type);
4365 if (!vectype)
4366 return false;
4368 optab = optab_for_tree_code (code, vectype, optab_scalar);
4369 if (!optab
4370 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4372 optab = optab_for_tree_code (code, vectype, optab_vector);
4373 if (!optab
4374 || (optab_handler (optab, TYPE_MODE (vectype))
4375 == CODE_FOR_nothing))
4376 return false;
4379 vec_mode = TYPE_MODE (vectype);
4380 icode = (int) optab_handler (optab, vec_mode);
4381 if (icode == CODE_FOR_nothing)
4382 return false;
4384 return true;
4388 /* Function vectorizable_shift.
4390 Check if STMT performs a shift operation that can be vectorized.
4391 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4392 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4393 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4395 static bool
4396 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4397 gimple **vec_stmt, slp_tree slp_node)
4399 tree vec_dest;
4400 tree scalar_dest;
4401 tree op0, op1 = NULL;
4402 tree vec_oprnd1 = NULL_TREE;
4403 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4404 tree vectype;
4405 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4406 enum tree_code code;
4407 machine_mode vec_mode;
4408 tree new_temp;
4409 optab optab;
4410 int icode;
4411 machine_mode optab_op2_mode;
4412 gimple *def_stmt;
4413 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4414 gimple *new_stmt = NULL;
4415 stmt_vec_info prev_stmt_info;
4416 int nunits_in;
4417 int nunits_out;
4418 tree vectype_out;
4419 tree op1_vectype;
4420 int ncopies;
4421 int j, i;
4422 vec<tree> vec_oprnds0 = vNULL;
4423 vec<tree> vec_oprnds1 = vNULL;
4424 tree vop0, vop1;
4425 unsigned int k;
4426 bool scalar_shift_arg = true;
4427 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4428 vec_info *vinfo = stmt_info->vinfo;
4429 int vf;
4431 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4432 return false;
4434 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4435 && ! vec_stmt)
4436 return false;
4438 /* Is STMT a vectorizable binary/unary operation? */
4439 if (!is_gimple_assign (stmt))
4440 return false;
4442 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4443 return false;
4445 code = gimple_assign_rhs_code (stmt);
4447 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4448 || code == RROTATE_EXPR))
4449 return false;
4451 scalar_dest = gimple_assign_lhs (stmt);
4452 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4453 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4454 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4456 if (dump_enabled_p ())
4457 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4458 "bit-precision shifts not supported.\n");
4459 return false;
4462 op0 = gimple_assign_rhs1 (stmt);
4463 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4465 if (dump_enabled_p ())
4466 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4467 "use not simple.\n");
4468 return false;
4470 /* If op0 is an external or constant def use a vector type with
4471 the same size as the output vector type. */
4472 if (!vectype)
4473 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4474 if (vec_stmt)
4475 gcc_assert (vectype);
4476 if (!vectype)
4478 if (dump_enabled_p ())
4479 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4480 "no vectype for scalar type\n");
4481 return false;
4484 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4485 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4486 if (nunits_out != nunits_in)
4487 return false;
4489 op1 = gimple_assign_rhs2 (stmt);
4490 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4492 if (dump_enabled_p ())
4493 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4494 "use not simple.\n");
4495 return false;
4498 if (loop_vinfo)
4499 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4500 else
4501 vf = 1;
4503 /* Multiple types in SLP are handled by creating the appropriate number of
4504 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4505 case of SLP. */
4506 if (slp_node)
4507 ncopies = 1;
4508 else
4509 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4511 gcc_assert (ncopies >= 1);
4513 /* Determine whether the shift amount is a vector, or scalar. If the
4514 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4516 if ((dt[1] == vect_internal_def
4517 || dt[1] == vect_induction_def)
4518 && !slp_node)
4519 scalar_shift_arg = false;
4520 else if (dt[1] == vect_constant_def
4521 || dt[1] == vect_external_def
4522 || dt[1] == vect_internal_def)
4524 /* In SLP, need to check whether the shift count is the same,
4525 in loops if it is a constant or invariant, it is always
4526 a scalar shift. */
4527 if (slp_node)
4529 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4530 gimple *slpstmt;
4532 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4533 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4534 scalar_shift_arg = false;
4537 /* If the shift amount is computed by a pattern stmt we cannot
4538 use the scalar amount directly thus give up and use a vector
4539 shift. */
4540 if (dt[1] == vect_internal_def)
4542 gimple *def = SSA_NAME_DEF_STMT (op1);
4543 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
4544 scalar_shift_arg = false;
4547 else
4549 if (dump_enabled_p ())
4550 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4551 "operand mode requires invariant argument.\n");
4552 return false;
4555 /* Vector shifted by vector. */
4556 if (!scalar_shift_arg)
4558 optab = optab_for_tree_code (code, vectype, optab_vector);
4559 if (dump_enabled_p ())
4560 dump_printf_loc (MSG_NOTE, vect_location,
4561 "vector/vector shift/rotate found.\n");
4563 if (!op1_vectype)
4564 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4565 if (op1_vectype == NULL_TREE
4566 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4568 if (dump_enabled_p ())
4569 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4570 "unusable type for last operand in"
4571 " vector/vector shift/rotate.\n");
4572 return false;
4575 /* See if the machine has a vector shifted by scalar insn and if not
4576 then see if it has a vector shifted by vector insn. */
4577 else
4579 optab = optab_for_tree_code (code, vectype, optab_scalar);
4580 if (optab
4581 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4583 if (dump_enabled_p ())
4584 dump_printf_loc (MSG_NOTE, vect_location,
4585 "vector/scalar shift/rotate found.\n");
4587 else
4589 optab = optab_for_tree_code (code, vectype, optab_vector);
4590 if (optab
4591 && (optab_handler (optab, TYPE_MODE (vectype))
4592 != CODE_FOR_nothing))
4594 scalar_shift_arg = false;
4596 if (dump_enabled_p ())
4597 dump_printf_loc (MSG_NOTE, vect_location,
4598 "vector/vector shift/rotate found.\n");
4600 /* Unlike the other binary operators, shifts/rotates have
4601 the rhs being int, instead of the same type as the lhs,
4602 so make sure the scalar is the right type if we are
4603 dealing with vectors of long long/long/short/char. */
4604 if (dt[1] == vect_constant_def)
4605 op1 = fold_convert (TREE_TYPE (vectype), op1);
4606 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4607 TREE_TYPE (op1)))
4609 if (slp_node
4610 && TYPE_MODE (TREE_TYPE (vectype))
4611 != TYPE_MODE (TREE_TYPE (op1)))
4613 if (dump_enabled_p ())
4614 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4615 "unusable type for last operand in"
4616 " vector/vector shift/rotate.\n");
4617 return false;
4619 if (vec_stmt && !slp_node)
4621 op1 = fold_convert (TREE_TYPE (vectype), op1);
4622 op1 = vect_init_vector (stmt, op1,
4623 TREE_TYPE (vectype), NULL);
4630 /* Supportable by target? */
4631 if (!optab)
4633 if (dump_enabled_p ())
4634 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4635 "no optab.\n");
4636 return false;
4638 vec_mode = TYPE_MODE (vectype);
4639 icode = (int) optab_handler (optab, vec_mode);
4640 if (icode == CODE_FOR_nothing)
4642 if (dump_enabled_p ())
4643 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4644 "op not supported by target.\n");
4645 /* Check only during analysis. */
4646 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4647 || (vf < vect_min_worthwhile_factor (code)
4648 && !vec_stmt))
4649 return false;
4650 if (dump_enabled_p ())
4651 dump_printf_loc (MSG_NOTE, vect_location,
4652 "proceeding using word mode.\n");
4655 /* Worthwhile without SIMD support? Check only during analysis. */
4656 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4657 && vf < vect_min_worthwhile_factor (code)
4658 && !vec_stmt)
4660 if (dump_enabled_p ())
4661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4662 "not worthwhile without SIMD support.\n");
4663 return false;
4666 if (!vec_stmt) /* transformation not required. */
4668 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4669 if (dump_enabled_p ())
4670 dump_printf_loc (MSG_NOTE, vect_location,
4671 "=== vectorizable_shift ===\n");
4672 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4673 return true;
4676 /** Transform. **/
4678 if (dump_enabled_p ())
4679 dump_printf_loc (MSG_NOTE, vect_location,
4680 "transform binary/unary operation.\n");
4682 /* Handle def. */
4683 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4685 prev_stmt_info = NULL;
4686 for (j = 0; j < ncopies; j++)
4688 /* Handle uses. */
4689 if (j == 0)
4691 if (scalar_shift_arg)
4693 /* Vector shl and shr insn patterns can be defined with scalar
4694 operand 2 (shift operand). In this case, use constant or loop
4695 invariant op1 directly, without extending it to vector mode
4696 first. */
4697 optab_op2_mode = insn_data[icode].operand[2].mode;
4698 if (!VECTOR_MODE_P (optab_op2_mode))
4700 if (dump_enabled_p ())
4701 dump_printf_loc (MSG_NOTE, vect_location,
4702 "operand 1 using scalar mode.\n");
4703 vec_oprnd1 = op1;
4704 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4705 vec_oprnds1.quick_push (vec_oprnd1);
4706 if (slp_node)
4708 /* Store vec_oprnd1 for every vector stmt to be created
4709 for SLP_NODE. We check during the analysis that all
4710 the shift arguments are the same.
4711 TODO: Allow different constants for different vector
4712 stmts generated for an SLP instance. */
4713 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4714 vec_oprnds1.quick_push (vec_oprnd1);
4719 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4720 (a special case for certain kind of vector shifts); otherwise,
4721 operand 1 should be of a vector type (the usual case). */
4722 if (vec_oprnd1)
4723 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4724 slp_node, -1);
4725 else
4726 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4727 slp_node, -1);
4729 else
4730 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4732 /* Arguments are ready. Create the new vector stmt. */
4733 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4735 vop1 = vec_oprnds1[i];
4736 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4737 new_temp = make_ssa_name (vec_dest, new_stmt);
4738 gimple_assign_set_lhs (new_stmt, new_temp);
4739 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4740 if (slp_node)
4741 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4744 if (slp_node)
4745 continue;
4747 if (j == 0)
4748 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4749 else
4750 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4751 prev_stmt_info = vinfo_for_stmt (new_stmt);
4754 vec_oprnds0.release ();
4755 vec_oprnds1.release ();
4757 return true;
4761 /* Function vectorizable_operation.
4763 Check if STMT performs a binary, unary or ternary operation that can
4764 be vectorized.
4765 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4766 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4767 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4769 static bool
4770 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
4771 gimple **vec_stmt, slp_tree slp_node)
4773 tree vec_dest;
4774 tree scalar_dest;
4775 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4776 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4777 tree vectype;
4778 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4779 enum tree_code code;
4780 machine_mode vec_mode;
4781 tree new_temp;
4782 int op_type;
4783 optab optab;
4784 bool target_support_p;
4785 gimple *def_stmt;
4786 enum vect_def_type dt[3]
4787 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4788 gimple *new_stmt = NULL;
4789 stmt_vec_info prev_stmt_info;
4790 int nunits_in;
4791 int nunits_out;
4792 tree vectype_out;
4793 int ncopies;
4794 int j, i;
4795 vec<tree> vec_oprnds0 = vNULL;
4796 vec<tree> vec_oprnds1 = vNULL;
4797 vec<tree> vec_oprnds2 = vNULL;
4798 tree vop0, vop1, vop2;
4799 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4800 vec_info *vinfo = stmt_info->vinfo;
4801 int vf;
4803 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4804 return false;
4806 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4807 && ! vec_stmt)
4808 return false;
4810 /* Is STMT a vectorizable binary/unary operation? */
4811 if (!is_gimple_assign (stmt))
4812 return false;
4814 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4815 return false;
4817 code = gimple_assign_rhs_code (stmt);
4819 /* For pointer addition, we should use the normal plus for
4820 the vector addition. */
4821 if (code == POINTER_PLUS_EXPR)
4822 code = PLUS_EXPR;
4824 /* Support only unary or binary operations. */
4825 op_type = TREE_CODE_LENGTH (code);
4826 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4828 if (dump_enabled_p ())
4829 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4830 "num. args = %d (not unary/binary/ternary op).\n",
4831 op_type);
4832 return false;
4835 scalar_dest = gimple_assign_lhs (stmt);
4836 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4838 /* Most operations cannot handle bit-precision types without extra
4839 truncations. */
4840 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4841 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4842 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4843 /* Exception are bitwise binary operations. */
4844 && code != BIT_IOR_EXPR
4845 && code != BIT_XOR_EXPR
4846 && code != BIT_AND_EXPR)
4848 if (dump_enabled_p ())
4849 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4850 "bit-precision arithmetic not supported.\n");
4851 return false;
4854 op0 = gimple_assign_rhs1 (stmt);
4855 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4857 if (dump_enabled_p ())
4858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4859 "use not simple.\n");
4860 return false;
4862 /* If op0 is an external or constant def use a vector type with
4863 the same size as the output vector type. */
4864 if (!vectype)
4866 /* For boolean type we cannot determine vectype by
4867 invariant value (don't know whether it is a vector
4868 of booleans or vector of integers). We use output
4869 vectype because operations on boolean don't change
4870 type. */
4871 if (TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE)
4873 if (TREE_CODE (TREE_TYPE (scalar_dest)) != BOOLEAN_TYPE)
4875 if (dump_enabled_p ())
4876 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4877 "not supported operation on bool value.\n");
4878 return false;
4880 vectype = vectype_out;
4882 else
4883 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4885 if (vec_stmt)
4886 gcc_assert (vectype);
4887 if (!vectype)
4889 if (dump_enabled_p ())
4891 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4892 "no vectype for scalar type ");
4893 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4894 TREE_TYPE (op0));
4895 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4898 return false;
4901 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4902 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4903 if (nunits_out != nunits_in)
4904 return false;
4906 if (op_type == binary_op || op_type == ternary_op)
4908 op1 = gimple_assign_rhs2 (stmt);
4909 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
4911 if (dump_enabled_p ())
4912 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4913 "use not simple.\n");
4914 return false;
4917 if (op_type == ternary_op)
4919 op2 = gimple_assign_rhs3 (stmt);
4920 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
4922 if (dump_enabled_p ())
4923 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4924 "use not simple.\n");
4925 return false;
4929 if (loop_vinfo)
4930 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4931 else
4932 vf = 1;
4934 /* Multiple types in SLP are handled by creating the appropriate number of
4935 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4936 case of SLP. */
4937 if (slp_node)
4938 ncopies = 1;
4939 else
4940 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4942 gcc_assert (ncopies >= 1);
4944 /* Shifts are handled in vectorizable_shift (). */
4945 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4946 || code == RROTATE_EXPR)
4947 return false;
4949 /* Supportable by target? */
4951 vec_mode = TYPE_MODE (vectype);
4952 if (code == MULT_HIGHPART_EXPR)
4953 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
4954 else
4956 optab = optab_for_tree_code (code, vectype, optab_default);
4957 if (!optab)
4959 if (dump_enabled_p ())
4960 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4961 "no optab.\n");
4962 return false;
4964 target_support_p = (optab_handler (optab, vec_mode)
4965 != CODE_FOR_nothing);
4968 if (!target_support_p)
4970 if (dump_enabled_p ())
4971 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4972 "op not supported by target.\n");
4973 /* Check only during analysis. */
4974 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4975 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4976 return false;
4977 if (dump_enabled_p ())
4978 dump_printf_loc (MSG_NOTE, vect_location,
4979 "proceeding using word mode.\n");
4982 /* Worthwhile without SIMD support? Check only during analysis. */
4983 if (!VECTOR_MODE_P (vec_mode)
4984 && !vec_stmt
4985 && vf < vect_min_worthwhile_factor (code))
4987 if (dump_enabled_p ())
4988 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4989 "not worthwhile without SIMD support.\n");
4990 return false;
4993 if (!vec_stmt) /* transformation not required. */
4995 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
4996 if (dump_enabled_p ())
4997 dump_printf_loc (MSG_NOTE, vect_location,
4998 "=== vectorizable_operation ===\n");
4999 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
5000 return true;
5003 /** Transform. **/
5005 if (dump_enabled_p ())
5006 dump_printf_loc (MSG_NOTE, vect_location,
5007 "transform binary/unary operation.\n");
5009 /* Handle def. */
5010 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5012 /* In case the vectorization factor (VF) is bigger than the number
5013 of elements that we can fit in a vectype (nunits), we have to generate
5014 more than one vector stmt - i.e - we need to "unroll" the
5015 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5016 from one copy of the vector stmt to the next, in the field
5017 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5018 stages to find the correct vector defs to be used when vectorizing
5019 stmts that use the defs of the current stmt. The example below
5020 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5021 we need to create 4 vectorized stmts):
5023 before vectorization:
5024 RELATED_STMT VEC_STMT
5025 S1: x = memref - -
5026 S2: z = x + 1 - -
5028 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5029 there):
5030 RELATED_STMT VEC_STMT
5031 VS1_0: vx0 = memref0 VS1_1 -
5032 VS1_1: vx1 = memref1 VS1_2 -
5033 VS1_2: vx2 = memref2 VS1_3 -
5034 VS1_3: vx3 = memref3 - -
5035 S1: x = load - VS1_0
5036 S2: z = x + 1 - -
5038 step2: vectorize stmt S2 (done here):
5039 To vectorize stmt S2 we first need to find the relevant vector
5040 def for the first operand 'x'. This is, as usual, obtained from
5041 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5042 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5043 relevant vector def 'vx0'. Having found 'vx0' we can generate
5044 the vector stmt VS2_0, and as usual, record it in the
5045 STMT_VINFO_VEC_STMT of stmt S2.
5046 When creating the second copy (VS2_1), we obtain the relevant vector
5047 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5048 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5049 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5050 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5051 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5052 chain of stmts and pointers:
5053 RELATED_STMT VEC_STMT
5054 VS1_0: vx0 = memref0 VS1_1 -
5055 VS1_1: vx1 = memref1 VS1_2 -
5056 VS1_2: vx2 = memref2 VS1_3 -
5057 VS1_3: vx3 = memref3 - -
5058 S1: x = load - VS1_0
5059 VS2_0: vz0 = vx0 + v1 VS2_1 -
5060 VS2_1: vz1 = vx1 + v1 VS2_2 -
5061 VS2_2: vz2 = vx2 + v1 VS2_3 -
5062 VS2_3: vz3 = vx3 + v1 - -
5063 S2: z = x + 1 - VS2_0 */
5065 prev_stmt_info = NULL;
5066 for (j = 0; j < ncopies; j++)
5068 /* Handle uses. */
5069 if (j == 0)
5071 if (op_type == binary_op || op_type == ternary_op)
5072 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5073 slp_node, -1);
5074 else
5075 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5076 slp_node, -1);
5077 if (op_type == ternary_op)
5079 vec_oprnds2.create (1);
5080 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
5081 stmt));
5084 else
5086 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5087 if (op_type == ternary_op)
5089 tree vec_oprnd = vec_oprnds2.pop ();
5090 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5091 vec_oprnd));
5095 /* Arguments are ready. Create the new vector stmt. */
5096 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5098 vop1 = ((op_type == binary_op || op_type == ternary_op)
5099 ? vec_oprnds1[i] : NULL_TREE);
5100 vop2 = ((op_type == ternary_op)
5101 ? vec_oprnds2[i] : NULL_TREE);
5102 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5103 new_temp = make_ssa_name (vec_dest, new_stmt);
5104 gimple_assign_set_lhs (new_stmt, new_temp);
5105 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5106 if (slp_node)
5107 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5110 if (slp_node)
5111 continue;
5113 if (j == 0)
5114 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5115 else
5116 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5117 prev_stmt_info = vinfo_for_stmt (new_stmt);
5120 vec_oprnds0.release ();
5121 vec_oprnds1.release ();
5122 vec_oprnds2.release ();
5124 return true;
5127 /* A helper function to ensure data reference DR's base alignment
5128 for STMT_INFO. */
5130 static void
5131 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
5133 if (!dr->aux)
5134 return;
5136 if (DR_VECT_AUX (dr)->base_misaligned)
5138 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5139 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5141 if (decl_in_symtab_p (base_decl))
5142 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
5143 else
5145 SET_DECL_ALIGN (base_decl, TYPE_ALIGN (vectype));
5146 DECL_USER_ALIGN (base_decl) = 1;
5148 DR_VECT_AUX (dr)->base_misaligned = false;
5153 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
5154 reversal of the vector elements. If that is impossible to do,
5155 returns NULL. */
5157 static tree
5158 perm_mask_for_reverse (tree vectype)
5160 int i, nunits;
5161 unsigned char *sel;
5163 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5164 sel = XALLOCAVEC (unsigned char, nunits);
5166 for (i = 0; i < nunits; ++i)
5167 sel[i] = nunits - 1 - i;
5169 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
5170 return NULL_TREE;
5171 return vect_gen_perm_mask_checked (vectype, sel);
5174 /* Function vectorizable_store.
5176 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5177 can be vectorized.
5178 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5179 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5180 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5182 static bool
5183 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5184 slp_tree slp_node)
5186 tree scalar_dest;
5187 tree data_ref;
5188 tree op;
5189 tree vec_oprnd = NULL_TREE;
5190 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5191 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5192 tree elem_type;
5193 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5194 struct loop *loop = NULL;
5195 machine_mode vec_mode;
5196 tree dummy;
5197 enum dr_alignment_support alignment_support_scheme;
5198 gimple *def_stmt;
5199 enum vect_def_type dt;
5200 stmt_vec_info prev_stmt_info = NULL;
5201 tree dataref_ptr = NULL_TREE;
5202 tree dataref_offset = NULL_TREE;
5203 gimple *ptr_incr = NULL;
5204 int ncopies;
5205 int j;
5206 gimple *next_stmt, *first_stmt = NULL;
5207 bool grouped_store = false;
5208 bool store_lanes_p = false;
5209 unsigned int group_size, i;
5210 vec<tree> dr_chain = vNULL;
5211 vec<tree> oprnds = vNULL;
5212 vec<tree> result_chain = vNULL;
5213 bool inv_p;
5214 bool negative = false;
5215 tree offset = NULL_TREE;
5216 vec<tree> vec_oprnds = vNULL;
5217 bool slp = (slp_node != NULL);
5218 unsigned int vec_num;
5219 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5220 vec_info *vinfo = stmt_info->vinfo;
5221 tree aggr_type;
5222 tree scatter_base = NULL_TREE, scatter_off = NULL_TREE;
5223 tree scatter_off_vectype = NULL_TREE, scatter_decl = NULL_TREE;
5224 int scatter_scale = 1;
5225 enum vect_def_type scatter_idx_dt = vect_unknown_def_type;
5226 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5227 gimple *new_stmt;
5229 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5230 return false;
5232 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5233 && ! vec_stmt)
5234 return false;
5236 /* Is vectorizable store? */
5238 if (!is_gimple_assign (stmt))
5239 return false;
5241 scalar_dest = gimple_assign_lhs (stmt);
5242 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5243 && is_pattern_stmt_p (stmt_info))
5244 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5245 if (TREE_CODE (scalar_dest) != ARRAY_REF
5246 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5247 && TREE_CODE (scalar_dest) != INDIRECT_REF
5248 && TREE_CODE (scalar_dest) != COMPONENT_REF
5249 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5250 && TREE_CODE (scalar_dest) != REALPART_EXPR
5251 && TREE_CODE (scalar_dest) != MEM_REF)
5252 return false;
5254 /* Cannot have hybrid store SLP -- that would mean storing to the
5255 same location twice. */
5256 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5258 gcc_assert (gimple_assign_single_p (stmt));
5260 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5261 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5263 if (loop_vinfo)
5264 loop = LOOP_VINFO_LOOP (loop_vinfo);
5266 /* Multiple types in SLP are handled by creating the appropriate number of
5267 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5268 case of SLP. */
5269 if (slp)
5270 ncopies = 1;
5271 else
5272 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5274 gcc_assert (ncopies >= 1);
5276 /* FORNOW. This restriction should be relaxed. */
5277 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5279 if (dump_enabled_p ())
5280 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5281 "multiple types in nested loop.\n");
5282 return false;
5285 op = gimple_assign_rhs1 (stmt);
5287 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
5289 if (dump_enabled_p ())
5290 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5291 "use not simple.\n");
5292 return false;
5295 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5296 return false;
5298 elem_type = TREE_TYPE (vectype);
5299 vec_mode = TYPE_MODE (vectype);
5301 /* FORNOW. In some cases can vectorize even if data-type not supported
5302 (e.g. - array initialization with 0). */
5303 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5304 return false;
5306 if (!STMT_VINFO_DATA_REF (stmt_info))
5307 return false;
5309 if (!STMT_VINFO_STRIDED_P (stmt_info))
5311 negative =
5312 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5313 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5314 size_zero_node) < 0;
5315 if (negative && ncopies > 1)
5317 if (dump_enabled_p ())
5318 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5319 "multiple types with negative step.\n");
5320 return false;
5322 if (negative)
5324 gcc_assert (!grouped_store);
5325 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5326 if (alignment_support_scheme != dr_aligned
5327 && alignment_support_scheme != dr_unaligned_supported)
5329 if (dump_enabled_p ())
5330 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5331 "negative step but alignment required.\n");
5332 return false;
5334 if (dt != vect_constant_def
5335 && dt != vect_external_def
5336 && !perm_mask_for_reverse (vectype))
5338 if (dump_enabled_p ())
5339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5340 "negative step and reversing not supported.\n");
5341 return false;
5346 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5348 grouped_store = true;
5349 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5350 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5351 if (!slp && !STMT_VINFO_STRIDED_P (stmt_info))
5353 if (vect_store_lanes_supported (vectype, group_size))
5354 store_lanes_p = true;
5355 else if (!vect_grouped_store_supported (vectype, group_size))
5356 return false;
5359 if (STMT_VINFO_STRIDED_P (stmt_info)
5360 && slp
5361 && (group_size > nunits
5362 || nunits % group_size != 0))
5364 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5365 "unhandled strided group store\n");
5366 return false;
5369 if (first_stmt == stmt)
5371 /* STMT is the leader of the group. Check the operands of all the
5372 stmts of the group. */
5373 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5374 while (next_stmt)
5376 gcc_assert (gimple_assign_single_p (next_stmt));
5377 op = gimple_assign_rhs1 (next_stmt);
5378 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
5380 if (dump_enabled_p ())
5381 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5382 "use not simple.\n");
5383 return false;
5385 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5390 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5392 gimple *def_stmt;
5393 scatter_decl = vect_check_gather_scatter (stmt, loop_vinfo, &scatter_base,
5394 &scatter_off, &scatter_scale);
5395 gcc_assert (scatter_decl);
5396 if (!vect_is_simple_use (scatter_off, vinfo, &def_stmt, &scatter_idx_dt,
5397 &scatter_off_vectype))
5399 if (dump_enabled_p ())
5400 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5401 "scatter index use not simple.");
5402 return false;
5406 if (!vec_stmt) /* transformation not required. */
5408 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5409 /* The SLP costs are calculated during SLP analysis. */
5410 if (!PURE_SLP_STMT (stmt_info))
5411 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5412 NULL, NULL, NULL);
5413 return true;
5416 /** Transform. **/
5418 ensure_base_align (stmt_info, dr);
5420 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5422 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5423 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (scatter_decl));
5424 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5425 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5426 edge pe = loop_preheader_edge (loop);
5427 gimple_seq seq;
5428 basic_block new_bb;
5429 enum { NARROW, NONE, WIDEN } modifier;
5430 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (scatter_off_vectype);
5432 if (nunits == (unsigned int) scatter_off_nunits)
5433 modifier = NONE;
5434 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5436 unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
5437 modifier = WIDEN;
5439 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5440 sel[i] = i | nunits;
5442 perm_mask = vect_gen_perm_mask_checked (scatter_off_vectype, sel);
5443 gcc_assert (perm_mask != NULL_TREE);
5445 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5447 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5448 modifier = NARROW;
5450 for (i = 0; i < (unsigned int) nunits; ++i)
5451 sel[i] = i | scatter_off_nunits;
5453 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5454 gcc_assert (perm_mask != NULL_TREE);
5455 ncopies *= 2;
5457 else
5458 gcc_unreachable ();
5460 rettype = TREE_TYPE (TREE_TYPE (scatter_decl));
5461 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5462 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5463 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5464 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5465 scaletype = TREE_VALUE (arglist);
5467 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5468 && TREE_CODE (rettype) == VOID_TYPE);
5470 ptr = fold_convert (ptrtype, scatter_base);
5471 if (!is_gimple_min_invariant (ptr))
5473 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5474 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5475 gcc_assert (!new_bb);
5478 /* Currently we support only unconditional scatter stores,
5479 so mask should be all ones. */
5480 mask = build_int_cst (masktype, -1);
5481 mask = vect_init_vector (stmt, mask, masktype, NULL);
5483 scale = build_int_cst (scaletype, scatter_scale);
5485 prev_stmt_info = NULL;
5486 for (j = 0; j < ncopies; ++j)
5488 if (j == 0)
5490 src = vec_oprnd1
5491 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
5492 op = vec_oprnd0
5493 = vect_get_vec_def_for_operand (scatter_off, stmt);
5495 else if (modifier != NONE && (j & 1))
5497 if (modifier == WIDEN)
5499 src = vec_oprnd1
5500 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5501 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5502 stmt, gsi);
5504 else if (modifier == NARROW)
5506 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5507 stmt, gsi);
5508 op = vec_oprnd0
5509 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
5511 else
5512 gcc_unreachable ();
5514 else
5516 src = vec_oprnd1
5517 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5518 op = vec_oprnd0
5519 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
5522 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5524 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5525 == TYPE_VECTOR_SUBPARTS (srctype));
5526 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5527 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5528 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5529 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5530 src = var;
5533 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5535 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5536 == TYPE_VECTOR_SUBPARTS (idxtype));
5537 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5538 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5539 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5540 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5541 op = var;
5544 new_stmt
5545 = gimple_build_call (scatter_decl, 5, ptr, mask, op, src, scale);
5547 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5549 if (prev_stmt_info == NULL)
5550 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5551 else
5552 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5553 prev_stmt_info = vinfo_for_stmt (new_stmt);
5555 return true;
5558 if (grouped_store)
5560 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5561 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5563 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5565 /* FORNOW */
5566 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5568 /* We vectorize all the stmts of the interleaving group when we
5569 reach the last stmt in the group. */
5570 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5571 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5572 && !slp)
5574 *vec_stmt = NULL;
5575 return true;
5578 if (slp)
5580 grouped_store = false;
5581 /* VEC_NUM is the number of vect stmts to be created for this
5582 group. */
5583 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5584 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5585 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
5586 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5587 op = gimple_assign_rhs1 (first_stmt);
5589 else
5590 /* VEC_NUM is the number of vect stmts to be created for this
5591 group. */
5592 vec_num = group_size;
5594 else
5596 first_stmt = stmt;
5597 first_dr = dr;
5598 group_size = vec_num = 1;
5601 if (dump_enabled_p ())
5602 dump_printf_loc (MSG_NOTE, vect_location,
5603 "transform store. ncopies = %d\n", ncopies);
5605 if (STMT_VINFO_STRIDED_P (stmt_info))
5607 gimple_stmt_iterator incr_gsi;
5608 bool insert_after;
5609 gimple *incr;
5610 tree offvar;
5611 tree ivstep;
5612 tree running_off;
5613 gimple_seq stmts = NULL;
5614 tree stride_base, stride_step, alias_off;
5615 tree vec_oprnd;
5616 unsigned int g;
5618 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5620 stride_base
5621 = fold_build_pointer_plus
5622 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
5623 size_binop (PLUS_EXPR,
5624 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5625 convert_to_ptrofftype (DR_INIT(first_dr))));
5626 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
5628 /* For a store with loop-invariant (but other than power-of-2)
5629 stride (i.e. not a grouped access) like so:
5631 for (i = 0; i < n; i += stride)
5632 array[i] = ...;
5634 we generate a new induction variable and new stores from
5635 the components of the (vectorized) rhs:
5637 for (j = 0; ; j += VF*stride)
5638 vectemp = ...;
5639 tmp1 = vectemp[0];
5640 array[j] = tmp1;
5641 tmp2 = vectemp[1];
5642 array[j + stride] = tmp2;
5646 unsigned nstores = nunits;
5647 tree ltype = elem_type;
5648 if (slp)
5650 nstores = nunits / group_size;
5651 if (group_size < nunits)
5652 ltype = build_vector_type (elem_type, group_size);
5653 else
5654 ltype = vectype;
5655 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
5656 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5657 group_size = 1;
5660 ivstep = stride_step;
5661 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5662 build_int_cst (TREE_TYPE (ivstep),
5663 ncopies * nstores));
5665 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5667 create_iv (stride_base, ivstep, NULL,
5668 loop, &incr_gsi, insert_after,
5669 &offvar, NULL);
5670 incr = gsi_stmt (incr_gsi);
5671 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
5673 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5674 if (stmts)
5675 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5677 prev_stmt_info = NULL;
5678 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
5679 next_stmt = first_stmt;
5680 for (g = 0; g < group_size; g++)
5682 running_off = offvar;
5683 if (g)
5685 tree size = TYPE_SIZE_UNIT (ltype);
5686 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
5687 size);
5688 tree newoff = copy_ssa_name (running_off, NULL);
5689 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5690 running_off, pos);
5691 vect_finish_stmt_generation (stmt, incr, gsi);
5692 running_off = newoff;
5694 for (j = 0; j < ncopies; j++)
5696 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5697 and first_stmt == stmt. */
5698 if (j == 0)
5700 if (slp)
5702 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
5703 slp_node, -1);
5704 vec_oprnd = vec_oprnds[0];
5706 else
5708 gcc_assert (gimple_assign_single_p (next_stmt));
5709 op = gimple_assign_rhs1 (next_stmt);
5710 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
5713 else
5715 if (slp)
5716 vec_oprnd = vec_oprnds[j];
5717 else
5719 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
5720 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
5724 for (i = 0; i < nstores; i++)
5726 tree newref, newoff;
5727 gimple *incr, *assign;
5728 tree size = TYPE_SIZE (ltype);
5729 /* Extract the i'th component. */
5730 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
5731 bitsize_int (i), size);
5732 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
5733 size, pos);
5735 elem = force_gimple_operand_gsi (gsi, elem, true,
5736 NULL_TREE, true,
5737 GSI_SAME_STMT);
5739 newref = build2 (MEM_REF, ltype,
5740 running_off, alias_off);
5742 /* And store it to *running_off. */
5743 assign = gimple_build_assign (newref, elem);
5744 vect_finish_stmt_generation (stmt, assign, gsi);
5746 newoff = copy_ssa_name (running_off, NULL);
5747 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5748 running_off, stride_step);
5749 vect_finish_stmt_generation (stmt, incr, gsi);
5751 running_off = newoff;
5752 if (g == group_size - 1
5753 && !slp)
5755 if (j == 0 && i == 0)
5756 STMT_VINFO_VEC_STMT (stmt_info)
5757 = *vec_stmt = assign;
5758 else
5759 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
5760 prev_stmt_info = vinfo_for_stmt (assign);
5764 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5766 return true;
5769 dr_chain.create (group_size);
5770 oprnds.create (group_size);
5772 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5773 gcc_assert (alignment_support_scheme);
5774 /* Targets with store-lane instructions must not require explicit
5775 realignment. */
5776 gcc_assert (!store_lanes_p
5777 || alignment_support_scheme == dr_aligned
5778 || alignment_support_scheme == dr_unaligned_supported);
5780 if (negative)
5781 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5783 if (store_lanes_p)
5784 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5785 else
5786 aggr_type = vectype;
5788 /* In case the vectorization factor (VF) is bigger than the number
5789 of elements that we can fit in a vectype (nunits), we have to generate
5790 more than one vector stmt - i.e - we need to "unroll" the
5791 vector stmt by a factor VF/nunits. For more details see documentation in
5792 vect_get_vec_def_for_copy_stmt. */
5794 /* In case of interleaving (non-unit grouped access):
5796 S1: &base + 2 = x2
5797 S2: &base = x0
5798 S3: &base + 1 = x1
5799 S4: &base + 3 = x3
5801 We create vectorized stores starting from base address (the access of the
5802 first stmt in the chain (S2 in the above example), when the last store stmt
5803 of the chain (S4) is reached:
5805 VS1: &base = vx2
5806 VS2: &base + vec_size*1 = vx0
5807 VS3: &base + vec_size*2 = vx1
5808 VS4: &base + vec_size*3 = vx3
5810 Then permutation statements are generated:
5812 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5813 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5816 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5817 (the order of the data-refs in the output of vect_permute_store_chain
5818 corresponds to the order of scalar stmts in the interleaving chain - see
5819 the documentation of vect_permute_store_chain()).
5821 In case of both multiple types and interleaving, above vector stores and
5822 permutation stmts are created for every copy. The result vector stmts are
5823 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5824 STMT_VINFO_RELATED_STMT for the next copies.
5827 prev_stmt_info = NULL;
5828 for (j = 0; j < ncopies; j++)
5831 if (j == 0)
5833 if (slp)
5835 /* Get vectorized arguments for SLP_NODE. */
5836 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5837 NULL, slp_node, -1);
5839 vec_oprnd = vec_oprnds[0];
5841 else
5843 /* For interleaved stores we collect vectorized defs for all the
5844 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5845 used as an input to vect_permute_store_chain(), and OPRNDS as
5846 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5848 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5849 OPRNDS are of size 1. */
5850 next_stmt = first_stmt;
5851 for (i = 0; i < group_size; i++)
5853 /* Since gaps are not supported for interleaved stores,
5854 GROUP_SIZE is the exact number of stmts in the chain.
5855 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5856 there is no interleaving, GROUP_SIZE is 1, and only one
5857 iteration of the loop will be executed. */
5858 gcc_assert (next_stmt
5859 && gimple_assign_single_p (next_stmt));
5860 op = gimple_assign_rhs1 (next_stmt);
5862 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
5863 dr_chain.quick_push (vec_oprnd);
5864 oprnds.quick_push (vec_oprnd);
5865 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5869 /* We should have catched mismatched types earlier. */
5870 gcc_assert (useless_type_conversion_p (vectype,
5871 TREE_TYPE (vec_oprnd)));
5872 bool simd_lane_access_p
5873 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5874 if (simd_lane_access_p
5875 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5876 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5877 && integer_zerop (DR_OFFSET (first_dr))
5878 && integer_zerop (DR_INIT (first_dr))
5879 && alias_sets_conflict_p (get_alias_set (aggr_type),
5880 get_alias_set (DR_REF (first_dr))))
5882 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5883 dataref_offset = build_int_cst (reference_alias_ptr_type
5884 (DR_REF (first_dr)), 0);
5885 inv_p = false;
5887 else
5888 dataref_ptr
5889 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5890 simd_lane_access_p ? loop : NULL,
5891 offset, &dummy, gsi, &ptr_incr,
5892 simd_lane_access_p, &inv_p);
5893 gcc_assert (bb_vinfo || !inv_p);
5895 else
5897 /* For interleaved stores we created vectorized defs for all the
5898 defs stored in OPRNDS in the previous iteration (previous copy).
5899 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5900 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5901 next copy.
5902 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5903 OPRNDS are of size 1. */
5904 for (i = 0; i < group_size; i++)
5906 op = oprnds[i];
5907 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
5908 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5909 dr_chain[i] = vec_oprnd;
5910 oprnds[i] = vec_oprnd;
5912 if (dataref_offset)
5913 dataref_offset
5914 = int_const_binop (PLUS_EXPR, dataref_offset,
5915 TYPE_SIZE_UNIT (aggr_type));
5916 else
5917 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5918 TYPE_SIZE_UNIT (aggr_type));
5921 if (store_lanes_p)
5923 tree vec_array;
5925 /* Combine all the vectors into an array. */
5926 vec_array = create_vector_array (vectype, vec_num);
5927 for (i = 0; i < vec_num; i++)
5929 vec_oprnd = dr_chain[i];
5930 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5933 /* Emit:
5934 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5935 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5936 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5937 gimple_call_set_lhs (new_stmt, data_ref);
5938 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5940 else
5942 new_stmt = NULL;
5943 if (grouped_store)
5945 if (j == 0)
5946 result_chain.create (group_size);
5947 /* Permute. */
5948 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5949 &result_chain);
5952 next_stmt = first_stmt;
5953 for (i = 0; i < vec_num; i++)
5955 unsigned align, misalign;
5957 if (i > 0)
5958 /* Bump the vector pointer. */
5959 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5960 stmt, NULL_TREE);
5962 if (slp)
5963 vec_oprnd = vec_oprnds[i];
5964 else if (grouped_store)
5965 /* For grouped stores vectorized defs are interleaved in
5966 vect_permute_store_chain(). */
5967 vec_oprnd = result_chain[i];
5969 data_ref = fold_build2 (MEM_REF, TREE_TYPE (vec_oprnd),
5970 dataref_ptr,
5971 dataref_offset
5972 ? dataref_offset
5973 : build_int_cst (reference_alias_ptr_type
5974 (DR_REF (first_dr)), 0));
5975 align = TYPE_ALIGN_UNIT (vectype);
5976 if (aligned_access_p (first_dr))
5977 misalign = 0;
5978 else if (DR_MISALIGNMENT (first_dr) == -1)
5980 if (DR_VECT_AUX (first_dr)->base_element_aligned)
5981 align = TYPE_ALIGN_UNIT (elem_type);
5982 else
5983 align = get_object_alignment (DR_REF (first_dr))
5984 / BITS_PER_UNIT;
5985 misalign = 0;
5986 TREE_TYPE (data_ref)
5987 = build_aligned_type (TREE_TYPE (data_ref),
5988 align * BITS_PER_UNIT);
5990 else
5992 TREE_TYPE (data_ref)
5993 = build_aligned_type (TREE_TYPE (data_ref),
5994 TYPE_ALIGN (elem_type));
5995 misalign = DR_MISALIGNMENT (first_dr);
5997 if (dataref_offset == NULL_TREE
5998 && TREE_CODE (dataref_ptr) == SSA_NAME)
5999 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6000 misalign);
6002 if (negative
6003 && dt != vect_constant_def
6004 && dt != vect_external_def)
6006 tree perm_mask = perm_mask_for_reverse (vectype);
6007 tree perm_dest
6008 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6009 vectype);
6010 tree new_temp = make_ssa_name (perm_dest);
6012 /* Generate the permute statement. */
6013 gimple *perm_stmt
6014 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6015 vec_oprnd, perm_mask);
6016 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6018 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6019 vec_oprnd = new_temp;
6022 /* Arguments are ready. Create the new vector stmt. */
6023 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6024 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6026 if (slp)
6027 continue;
6029 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6030 if (!next_stmt)
6031 break;
6034 if (!slp)
6036 if (j == 0)
6037 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6038 else
6039 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6040 prev_stmt_info = vinfo_for_stmt (new_stmt);
6044 dr_chain.release ();
6045 oprnds.release ();
6046 result_chain.release ();
6047 vec_oprnds.release ();
6049 return true;
6052 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6053 VECTOR_CST mask. No checks are made that the target platform supports the
6054 mask, so callers may wish to test can_vec_perm_p separately, or use
6055 vect_gen_perm_mask_checked. */
6057 tree
6058 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
6060 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
6061 int i, nunits;
6063 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6065 mask_elt_type = lang_hooks.types.type_for_mode
6066 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
6067 mask_type = get_vectype_for_scalar_type (mask_elt_type);
6069 mask_elts = XALLOCAVEC (tree, nunits);
6070 for (i = nunits - 1; i >= 0; i--)
6071 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
6072 mask_vec = build_vector (mask_type, mask_elts);
6074 return mask_vec;
6077 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6078 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6080 tree
6081 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
6083 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
6084 return vect_gen_perm_mask_any (vectype, sel);
6087 /* Given a vector variable X and Y, that was generated for the scalar
6088 STMT, generate instructions to permute the vector elements of X and Y
6089 using permutation mask MASK_VEC, insert them at *GSI and return the
6090 permuted vector variable. */
6092 static tree
6093 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6094 gimple_stmt_iterator *gsi)
6096 tree vectype = TREE_TYPE (x);
6097 tree perm_dest, data_ref;
6098 gimple *perm_stmt;
6100 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6101 data_ref = make_ssa_name (perm_dest);
6103 /* Generate the permute statement. */
6104 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6105 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6107 return data_ref;
6110 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6111 inserting them on the loops preheader edge. Returns true if we
6112 were successful in doing so (and thus STMT can be moved then),
6113 otherwise returns false. */
6115 static bool
6116 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6118 ssa_op_iter i;
6119 tree op;
6120 bool any = false;
6122 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6124 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6125 if (!gimple_nop_p (def_stmt)
6126 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6128 /* Make sure we don't need to recurse. While we could do
6129 so in simple cases when there are more complex use webs
6130 we don't have an easy way to preserve stmt order to fulfil
6131 dependencies within them. */
6132 tree op2;
6133 ssa_op_iter i2;
6134 if (gimple_code (def_stmt) == GIMPLE_PHI)
6135 return false;
6136 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6138 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6139 if (!gimple_nop_p (def_stmt2)
6140 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6141 return false;
6143 any = true;
6147 if (!any)
6148 return true;
6150 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6152 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6153 if (!gimple_nop_p (def_stmt)
6154 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6156 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6157 gsi_remove (&gsi, false);
6158 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6162 return true;
6165 /* vectorizable_load.
6167 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6168 can be vectorized.
6169 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6170 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6171 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6173 static bool
6174 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6175 slp_tree slp_node, slp_instance slp_node_instance)
6177 tree scalar_dest;
6178 tree vec_dest = NULL;
6179 tree data_ref = NULL;
6180 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6181 stmt_vec_info prev_stmt_info;
6182 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6183 struct loop *loop = NULL;
6184 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6185 bool nested_in_vect_loop = false;
6186 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6187 tree elem_type;
6188 tree new_temp;
6189 machine_mode mode;
6190 gimple *new_stmt = NULL;
6191 tree dummy;
6192 enum dr_alignment_support alignment_support_scheme;
6193 tree dataref_ptr = NULL_TREE;
6194 tree dataref_offset = NULL_TREE;
6195 gimple *ptr_incr = NULL;
6196 int ncopies;
6197 int i, j, group_size = -1, group_gap_adj;
6198 tree msq = NULL_TREE, lsq;
6199 tree offset = NULL_TREE;
6200 tree byte_offset = NULL_TREE;
6201 tree realignment_token = NULL_TREE;
6202 gphi *phi = NULL;
6203 vec<tree> dr_chain = vNULL;
6204 bool grouped_load = false;
6205 bool load_lanes_p = false;
6206 gimple *first_stmt;
6207 gimple *first_stmt_for_drptr = NULL;
6208 bool inv_p;
6209 bool negative = false;
6210 bool compute_in_loop = false;
6211 struct loop *at_loop;
6212 int vec_num;
6213 bool slp = (slp_node != NULL);
6214 bool slp_perm = false;
6215 enum tree_code code;
6216 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6217 int vf;
6218 tree aggr_type;
6219 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
6220 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
6221 int gather_scale = 1;
6222 enum vect_def_type gather_dt = vect_unknown_def_type;
6223 vec_info *vinfo = stmt_info->vinfo;
6225 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6226 return false;
6228 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6229 && ! vec_stmt)
6230 return false;
6232 /* Is vectorizable load? */
6233 if (!is_gimple_assign (stmt))
6234 return false;
6236 scalar_dest = gimple_assign_lhs (stmt);
6237 if (TREE_CODE (scalar_dest) != SSA_NAME)
6238 return false;
6240 code = gimple_assign_rhs_code (stmt);
6241 if (code != ARRAY_REF
6242 && code != BIT_FIELD_REF
6243 && code != INDIRECT_REF
6244 && code != COMPONENT_REF
6245 && code != IMAGPART_EXPR
6246 && code != REALPART_EXPR
6247 && code != MEM_REF
6248 && TREE_CODE_CLASS (code) != tcc_declaration)
6249 return false;
6251 if (!STMT_VINFO_DATA_REF (stmt_info))
6252 return false;
6254 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6255 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6257 if (loop_vinfo)
6259 loop = LOOP_VINFO_LOOP (loop_vinfo);
6260 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6261 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6263 else
6264 vf = 1;
6266 /* Multiple types in SLP are handled by creating the appropriate number of
6267 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6268 case of SLP. */
6269 if (slp)
6270 ncopies = 1;
6271 else
6272 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6274 gcc_assert (ncopies >= 1);
6276 /* FORNOW. This restriction should be relaxed. */
6277 if (nested_in_vect_loop && ncopies > 1)
6279 if (dump_enabled_p ())
6280 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6281 "multiple types in nested loop.\n");
6282 return false;
6285 /* Invalidate assumptions made by dependence analysis when vectorization
6286 on the unrolled body effectively re-orders stmts. */
6287 if (ncopies > 1
6288 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6289 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6290 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6292 if (dump_enabled_p ())
6293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6294 "cannot perform implicit CSE when unrolling "
6295 "with negative dependence distance\n");
6296 return false;
6299 elem_type = TREE_TYPE (vectype);
6300 mode = TYPE_MODE (vectype);
6302 /* FORNOW. In some cases can vectorize even if data-type not supported
6303 (e.g. - data copies). */
6304 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6306 if (dump_enabled_p ())
6307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6308 "Aligned load, but unsupported type.\n");
6309 return false;
6312 /* Check if the load is a part of an interleaving chain. */
6313 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6315 grouped_load = true;
6316 /* FORNOW */
6317 gcc_assert (!nested_in_vect_loop && !STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6319 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6320 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6322 if (!slp && !STMT_VINFO_STRIDED_P (stmt_info))
6324 if (vect_load_lanes_supported (vectype, group_size))
6325 load_lanes_p = true;
6326 else if (!vect_grouped_load_supported (vectype, group_size))
6327 return false;
6330 /* If this is single-element interleaving with an element distance
6331 that leaves unused vector loads around punt - we at least create
6332 very sub-optimal code in that case (and blow up memory,
6333 see PR65518). */
6334 if (first_stmt == stmt
6335 && !GROUP_NEXT_ELEMENT (stmt_info))
6337 if (GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
6339 if (dump_enabled_p ())
6340 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6341 "single-element interleaving not supported "
6342 "for not adjacent vector loads\n");
6343 return false;
6346 /* Single-element interleaving requires peeling for gaps. */
6347 gcc_assert (GROUP_GAP (stmt_info));
6350 /* If there is a gap in the end of the group or the group size cannot
6351 be made a multiple of the vector element count then we access excess
6352 elements in the last iteration and thus need to peel that off. */
6353 if (loop_vinfo
6354 && ! STMT_VINFO_STRIDED_P (stmt_info)
6355 && (GROUP_GAP (vinfo_for_stmt (first_stmt)) != 0
6356 || (!slp && !load_lanes_p && vf % group_size != 0)))
6358 if (dump_enabled_p ())
6359 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6360 "Data access with gaps requires scalar "
6361 "epilogue loop\n");
6362 if (loop->inner)
6364 if (dump_enabled_p ())
6365 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6366 "Peeling for outer loop is not supported\n");
6367 return false;
6370 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
6373 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6374 slp_perm = true;
6376 /* ??? The following is overly pessimistic (as well as the loop
6377 case above) in the case we can statically determine the excess
6378 elements loaded are within the bounds of a decl that is accessed.
6379 Likewise for BB vectorizations using masked loads is a possibility. */
6380 if (bb_vinfo && slp_perm && group_size % nunits != 0)
6382 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6383 "BB vectorization with gaps at the end of a load "
6384 "is not supported\n");
6385 return false;
6388 /* Invalidate assumptions made by dependence analysis when vectorization
6389 on the unrolled body effectively re-orders stmts. */
6390 if (!PURE_SLP_STMT (stmt_info)
6391 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6392 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6393 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6395 if (dump_enabled_p ())
6396 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6397 "cannot perform implicit CSE when performing "
6398 "group loads with negative dependence distance\n");
6399 return false;
6402 /* Similarly when the stmt is a load that is both part of a SLP
6403 instance and a loop vectorized stmt via the same-dr mechanism
6404 we have to give up. */
6405 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6406 && (STMT_SLP_TYPE (stmt_info)
6407 != STMT_SLP_TYPE (vinfo_for_stmt
6408 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6410 if (dump_enabled_p ())
6411 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6412 "conflicting SLP types for CSEd load\n");
6413 return false;
6418 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6420 gimple *def_stmt;
6421 gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
6422 &gather_off, &gather_scale);
6423 gcc_assert (gather_decl);
6424 if (!vect_is_simple_use (gather_off, vinfo, &def_stmt, &gather_dt,
6425 &gather_off_vectype))
6427 if (dump_enabled_p ())
6428 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6429 "gather index use not simple.\n");
6430 return false;
6433 else if (STMT_VINFO_STRIDED_P (stmt_info))
6435 if (grouped_load
6436 && slp
6437 && (group_size > nunits
6438 || nunits % group_size != 0))
6440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6441 "unhandled strided group load\n");
6442 return false;
6445 else
6447 negative = tree_int_cst_compare (nested_in_vect_loop
6448 ? STMT_VINFO_DR_STEP (stmt_info)
6449 : DR_STEP (dr),
6450 size_zero_node) < 0;
6451 if (negative && ncopies > 1)
6453 if (dump_enabled_p ())
6454 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6455 "multiple types with negative step.\n");
6456 return false;
6459 if (negative)
6461 if (grouped_load)
6463 if (dump_enabled_p ())
6464 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6465 "negative step for group load not supported"
6466 "\n");
6467 return false;
6469 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
6470 if (alignment_support_scheme != dr_aligned
6471 && alignment_support_scheme != dr_unaligned_supported)
6473 if (dump_enabled_p ())
6474 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6475 "negative step but alignment required.\n");
6476 return false;
6478 if (!perm_mask_for_reverse (vectype))
6480 if (dump_enabled_p ())
6481 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6482 "negative step and reversing not supported."
6483 "\n");
6484 return false;
6489 if (!vec_stmt) /* transformation not required. */
6491 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6492 /* The SLP costs are calculated during SLP analysis. */
6493 if (!PURE_SLP_STMT (stmt_info))
6494 vect_model_load_cost (stmt_info, ncopies, load_lanes_p,
6495 NULL, NULL, NULL);
6496 return true;
6499 if (dump_enabled_p ())
6500 dump_printf_loc (MSG_NOTE, vect_location,
6501 "transform load. ncopies = %d\n", ncopies);
6503 /** Transform. **/
6505 ensure_base_align (stmt_info, dr);
6507 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6509 tree vec_oprnd0 = NULL_TREE, op;
6510 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
6511 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6512 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6513 edge pe = loop_preheader_edge (loop);
6514 gimple_seq seq;
6515 basic_block new_bb;
6516 enum { NARROW, NONE, WIDEN } modifier;
6517 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
6519 if (nunits == gather_off_nunits)
6520 modifier = NONE;
6521 else if (nunits == gather_off_nunits / 2)
6523 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6524 modifier = WIDEN;
6526 for (i = 0; i < gather_off_nunits; ++i)
6527 sel[i] = i | nunits;
6529 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
6531 else if (nunits == gather_off_nunits * 2)
6533 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6534 modifier = NARROW;
6536 for (i = 0; i < nunits; ++i)
6537 sel[i] = i < gather_off_nunits
6538 ? i : i + nunits - gather_off_nunits;
6540 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6541 ncopies *= 2;
6543 else
6544 gcc_unreachable ();
6546 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
6547 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6548 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6549 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6550 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6551 scaletype = TREE_VALUE (arglist);
6552 gcc_checking_assert (types_compatible_p (srctype, rettype));
6554 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6556 ptr = fold_convert (ptrtype, gather_base);
6557 if (!is_gimple_min_invariant (ptr))
6559 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6560 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6561 gcc_assert (!new_bb);
6564 /* Currently we support only unconditional gather loads,
6565 so mask should be all ones. */
6566 if (TREE_CODE (masktype) == INTEGER_TYPE)
6567 mask = build_int_cst (masktype, -1);
6568 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6570 mask = build_int_cst (TREE_TYPE (masktype), -1);
6571 mask = build_vector_from_val (masktype, mask);
6572 mask = vect_init_vector (stmt, mask, masktype, NULL);
6574 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6576 REAL_VALUE_TYPE r;
6577 long tmp[6];
6578 for (j = 0; j < 6; ++j)
6579 tmp[j] = -1;
6580 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6581 mask = build_real (TREE_TYPE (masktype), r);
6582 mask = build_vector_from_val (masktype, mask);
6583 mask = vect_init_vector (stmt, mask, masktype, NULL);
6585 else
6586 gcc_unreachable ();
6588 scale = build_int_cst (scaletype, gather_scale);
6590 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6591 merge = build_int_cst (TREE_TYPE (rettype), 0);
6592 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6594 REAL_VALUE_TYPE r;
6595 long tmp[6];
6596 for (j = 0; j < 6; ++j)
6597 tmp[j] = 0;
6598 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6599 merge = build_real (TREE_TYPE (rettype), r);
6601 else
6602 gcc_unreachable ();
6603 merge = build_vector_from_val (rettype, merge);
6604 merge = vect_init_vector (stmt, merge, rettype, NULL);
6606 prev_stmt_info = NULL;
6607 for (j = 0; j < ncopies; ++j)
6609 if (modifier == WIDEN && (j & 1))
6610 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6611 perm_mask, stmt, gsi);
6612 else if (j == 0)
6613 op = vec_oprnd0
6614 = vect_get_vec_def_for_operand (gather_off, stmt);
6615 else
6616 op = vec_oprnd0
6617 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6619 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6621 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6622 == TYPE_VECTOR_SUBPARTS (idxtype));
6623 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6624 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6625 new_stmt
6626 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6627 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6628 op = var;
6631 new_stmt
6632 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
6634 if (!useless_type_conversion_p (vectype, rettype))
6636 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6637 == TYPE_VECTOR_SUBPARTS (rettype));
6638 op = vect_get_new_ssa_name (rettype, vect_simple_var);
6639 gimple_call_set_lhs (new_stmt, op);
6640 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6641 var = make_ssa_name (vec_dest);
6642 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6643 new_stmt
6644 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6646 else
6648 var = make_ssa_name (vec_dest, new_stmt);
6649 gimple_call_set_lhs (new_stmt, var);
6652 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6654 if (modifier == NARROW)
6656 if ((j & 1) == 0)
6658 prev_res = var;
6659 continue;
6661 var = permute_vec_elements (prev_res, var,
6662 perm_mask, stmt, gsi);
6663 new_stmt = SSA_NAME_DEF_STMT (var);
6666 if (prev_stmt_info == NULL)
6667 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6668 else
6669 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6670 prev_stmt_info = vinfo_for_stmt (new_stmt);
6672 return true;
6674 else if (STMT_VINFO_STRIDED_P (stmt_info))
6676 gimple_stmt_iterator incr_gsi;
6677 bool insert_after;
6678 gimple *incr;
6679 tree offvar;
6680 tree ivstep;
6681 tree running_off;
6682 vec<constructor_elt, va_gc> *v = NULL;
6683 gimple_seq stmts = NULL;
6684 tree stride_base, stride_step, alias_off;
6686 gcc_assert (!nested_in_vect_loop);
6688 if (slp && grouped_load)
6689 first_dr = STMT_VINFO_DATA_REF
6690 (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
6691 else
6692 first_dr = dr;
6694 stride_base
6695 = fold_build_pointer_plus
6696 (DR_BASE_ADDRESS (first_dr),
6697 size_binop (PLUS_EXPR,
6698 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6699 convert_to_ptrofftype (DR_INIT (first_dr))));
6700 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6702 /* For a load with loop-invariant (but other than power-of-2)
6703 stride (i.e. not a grouped access) like so:
6705 for (i = 0; i < n; i += stride)
6706 ... = array[i];
6708 we generate a new induction variable and new accesses to
6709 form a new vector (or vectors, depending on ncopies):
6711 for (j = 0; ; j += VF*stride)
6712 tmp1 = array[j];
6713 tmp2 = array[j + stride];
6715 vectemp = {tmp1, tmp2, ...}
6718 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6719 build_int_cst (TREE_TYPE (stride_step), vf));
6721 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6723 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
6724 loop, &incr_gsi, insert_after,
6725 &offvar, NULL);
6726 incr = gsi_stmt (incr_gsi);
6727 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6729 stride_step = force_gimple_operand (unshare_expr (stride_step),
6730 &stmts, true, NULL_TREE);
6731 if (stmts)
6732 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6734 prev_stmt_info = NULL;
6735 running_off = offvar;
6736 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
6737 int nloads = nunits;
6738 tree ltype = TREE_TYPE (vectype);
6739 auto_vec<tree> dr_chain;
6740 if (slp)
6742 nloads = nunits / group_size;
6743 if (group_size < nunits)
6744 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
6745 else
6746 ltype = vectype;
6747 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
6748 /* For SLP permutation support we need to load the whole group,
6749 not only the number of vector stmts the permutation result
6750 fits in. */
6751 if (slp_perm)
6753 ncopies = (group_size * vf + nunits - 1) / nunits;
6754 dr_chain.create (ncopies);
6756 else
6757 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6759 for (j = 0; j < ncopies; j++)
6761 tree vec_inv;
6763 if (nloads > 1)
6765 vec_alloc (v, nloads);
6766 for (i = 0; i < nloads; i++)
6768 tree newref, newoff;
6769 gimple *incr;
6770 newref = build2 (MEM_REF, ltype, running_off, alias_off);
6772 newref = force_gimple_operand_gsi (gsi, newref, true,
6773 NULL_TREE, true,
6774 GSI_SAME_STMT);
6775 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6776 newoff = copy_ssa_name (running_off);
6777 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6778 running_off, stride_step);
6779 vect_finish_stmt_generation (stmt, incr, gsi);
6781 running_off = newoff;
6784 vec_inv = build_constructor (vectype, v);
6785 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6786 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6788 else
6790 new_stmt = gimple_build_assign (make_ssa_name (ltype),
6791 build2 (MEM_REF, ltype,
6792 running_off, alias_off));
6793 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6795 tree newoff = copy_ssa_name (running_off);
6796 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6797 running_off, stride_step);
6798 vect_finish_stmt_generation (stmt, incr, gsi);
6800 running_off = newoff;
6803 if (slp)
6805 if (slp_perm)
6806 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
6807 else
6808 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6810 else
6812 if (j == 0)
6813 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6814 else
6815 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6816 prev_stmt_info = vinfo_for_stmt (new_stmt);
6819 if (slp_perm)
6820 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6821 slp_node_instance, false);
6822 return true;
6825 if (grouped_load)
6827 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6828 /* For SLP vectorization we directly vectorize a subchain
6829 without permutation. */
6830 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6831 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6832 /* For BB vectorization always use the first stmt to base
6833 the data ref pointer on. */
6834 if (bb_vinfo)
6835 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6837 /* Check if the chain of loads is already vectorized. */
6838 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6839 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6840 ??? But we can only do so if there is exactly one
6841 as we have no way to get at the rest. Leave the CSE
6842 opportunity alone.
6843 ??? With the group load eventually participating
6844 in multiple different permutations (having multiple
6845 slp nodes which refer to the same group) the CSE
6846 is even wrong code. See PR56270. */
6847 && !slp)
6849 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6850 return true;
6852 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6853 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6854 group_gap_adj = 0;
6856 /* VEC_NUM is the number of vect stmts to be created for this group. */
6857 if (slp)
6859 grouped_load = false;
6860 /* For SLP permutation support we need to load the whole group,
6861 not only the number of vector stmts the permutation result
6862 fits in. */
6863 if (slp_perm)
6864 vec_num = (group_size * vf + nunits - 1) / nunits;
6865 else
6866 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6867 group_gap_adj = vf * group_size - nunits * vec_num;
6869 else
6870 vec_num = group_size;
6872 else
6874 first_stmt = stmt;
6875 first_dr = dr;
6876 group_size = vec_num = 1;
6877 group_gap_adj = 0;
6880 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6881 gcc_assert (alignment_support_scheme);
6882 /* Targets with load-lane instructions must not require explicit
6883 realignment. */
6884 gcc_assert (!load_lanes_p
6885 || alignment_support_scheme == dr_aligned
6886 || alignment_support_scheme == dr_unaligned_supported);
6888 /* In case the vectorization factor (VF) is bigger than the number
6889 of elements that we can fit in a vectype (nunits), we have to generate
6890 more than one vector stmt - i.e - we need to "unroll" the
6891 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6892 from one copy of the vector stmt to the next, in the field
6893 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6894 stages to find the correct vector defs to be used when vectorizing
6895 stmts that use the defs of the current stmt. The example below
6896 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6897 need to create 4 vectorized stmts):
6899 before vectorization:
6900 RELATED_STMT VEC_STMT
6901 S1: x = memref - -
6902 S2: z = x + 1 - -
6904 step 1: vectorize stmt S1:
6905 We first create the vector stmt VS1_0, and, as usual, record a
6906 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6907 Next, we create the vector stmt VS1_1, and record a pointer to
6908 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6909 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6910 stmts and pointers:
6911 RELATED_STMT VEC_STMT
6912 VS1_0: vx0 = memref0 VS1_1 -
6913 VS1_1: vx1 = memref1 VS1_2 -
6914 VS1_2: vx2 = memref2 VS1_3 -
6915 VS1_3: vx3 = memref3 - -
6916 S1: x = load - VS1_0
6917 S2: z = x + 1 - -
6919 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6920 information we recorded in RELATED_STMT field is used to vectorize
6921 stmt S2. */
6923 /* In case of interleaving (non-unit grouped access):
6925 S1: x2 = &base + 2
6926 S2: x0 = &base
6927 S3: x1 = &base + 1
6928 S4: x3 = &base + 3
6930 Vectorized loads are created in the order of memory accesses
6931 starting from the access of the first stmt of the chain:
6933 VS1: vx0 = &base
6934 VS2: vx1 = &base + vec_size*1
6935 VS3: vx3 = &base + vec_size*2
6936 VS4: vx4 = &base + vec_size*3
6938 Then permutation statements are generated:
6940 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6941 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6944 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6945 (the order of the data-refs in the output of vect_permute_load_chain
6946 corresponds to the order of scalar stmts in the interleaving chain - see
6947 the documentation of vect_permute_load_chain()).
6948 The generation of permutation stmts and recording them in
6949 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6951 In case of both multiple types and interleaving, the vector loads and
6952 permutation stmts above are created for every copy. The result vector
6953 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6954 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6956 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6957 on a target that supports unaligned accesses (dr_unaligned_supported)
6958 we generate the following code:
6959 p = initial_addr;
6960 indx = 0;
6961 loop {
6962 p = p + indx * vectype_size;
6963 vec_dest = *(p);
6964 indx = indx + 1;
6967 Otherwise, the data reference is potentially unaligned on a target that
6968 does not support unaligned accesses (dr_explicit_realign_optimized) -
6969 then generate the following code, in which the data in each iteration is
6970 obtained by two vector loads, one from the previous iteration, and one
6971 from the current iteration:
6972 p1 = initial_addr;
6973 msq_init = *(floor(p1))
6974 p2 = initial_addr + VS - 1;
6975 realignment_token = call target_builtin;
6976 indx = 0;
6977 loop {
6978 p2 = p2 + indx * vectype_size
6979 lsq = *(floor(p2))
6980 vec_dest = realign_load (msq, lsq, realignment_token)
6981 indx = indx + 1;
6982 msq = lsq;
6983 } */
6985 /* If the misalignment remains the same throughout the execution of the
6986 loop, we can create the init_addr and permutation mask at the loop
6987 preheader. Otherwise, it needs to be created inside the loop.
6988 This can only occur when vectorizing memory accesses in the inner-loop
6989 nested within an outer-loop that is being vectorized. */
6991 if (nested_in_vect_loop
6992 && (TREE_INT_CST_LOW (DR_STEP (dr))
6993 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6995 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6996 compute_in_loop = true;
6999 if ((alignment_support_scheme == dr_explicit_realign_optimized
7000 || alignment_support_scheme == dr_explicit_realign)
7001 && !compute_in_loop)
7003 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7004 alignment_support_scheme, NULL_TREE,
7005 &at_loop);
7006 if (alignment_support_scheme == dr_explicit_realign_optimized)
7008 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7009 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7010 size_one_node);
7013 else
7014 at_loop = loop;
7016 if (negative)
7017 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7019 if (load_lanes_p)
7020 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7021 else
7022 aggr_type = vectype;
7024 prev_stmt_info = NULL;
7025 for (j = 0; j < ncopies; j++)
7027 /* 1. Create the vector or array pointer update chain. */
7028 if (j == 0)
7030 bool simd_lane_access_p
7031 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7032 if (simd_lane_access_p
7033 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7034 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7035 && integer_zerop (DR_OFFSET (first_dr))
7036 && integer_zerop (DR_INIT (first_dr))
7037 && alias_sets_conflict_p (get_alias_set (aggr_type),
7038 get_alias_set (DR_REF (first_dr)))
7039 && (alignment_support_scheme == dr_aligned
7040 || alignment_support_scheme == dr_unaligned_supported))
7042 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7043 dataref_offset = build_int_cst (reference_alias_ptr_type
7044 (DR_REF (first_dr)), 0);
7045 inv_p = false;
7047 else if (first_stmt_for_drptr
7048 && first_stmt != first_stmt_for_drptr)
7050 dataref_ptr
7051 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7052 at_loop, offset, &dummy, gsi,
7053 &ptr_incr, simd_lane_access_p,
7054 &inv_p, byte_offset);
7055 /* Adjust the pointer by the difference to first_stmt. */
7056 data_reference_p ptrdr
7057 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7058 tree diff = fold_convert (sizetype,
7059 size_binop (MINUS_EXPR,
7060 DR_INIT (first_dr),
7061 DR_INIT (ptrdr)));
7062 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7063 stmt, diff);
7065 else
7066 dataref_ptr
7067 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7068 offset, &dummy, gsi, &ptr_incr,
7069 simd_lane_access_p, &inv_p,
7070 byte_offset);
7072 else if (dataref_offset)
7073 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7074 TYPE_SIZE_UNIT (aggr_type));
7075 else
7076 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7077 TYPE_SIZE_UNIT (aggr_type));
7079 if (grouped_load || slp_perm)
7080 dr_chain.create (vec_num);
7082 if (load_lanes_p)
7084 tree vec_array;
7086 vec_array = create_vector_array (vectype, vec_num);
7088 /* Emit:
7089 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7090 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
7091 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
7092 gimple_call_set_lhs (new_stmt, vec_array);
7093 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7095 /* Extract each vector into an SSA_NAME. */
7096 for (i = 0; i < vec_num; i++)
7098 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7099 vec_array, i);
7100 dr_chain.quick_push (new_temp);
7103 /* Record the mapping between SSA_NAMEs and statements. */
7104 vect_record_grouped_load_vectors (stmt, dr_chain);
7106 else
7108 for (i = 0; i < vec_num; i++)
7110 if (i > 0)
7111 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7112 stmt, NULL_TREE);
7114 /* 2. Create the vector-load in the loop. */
7115 switch (alignment_support_scheme)
7117 case dr_aligned:
7118 case dr_unaligned_supported:
7120 unsigned int align, misalign;
7122 data_ref
7123 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7124 dataref_offset
7125 ? dataref_offset
7126 : build_int_cst (reference_alias_ptr_type
7127 (DR_REF (first_dr)), 0));
7128 align = TYPE_ALIGN_UNIT (vectype);
7129 if (alignment_support_scheme == dr_aligned)
7131 gcc_assert (aligned_access_p (first_dr));
7132 misalign = 0;
7134 else if (DR_MISALIGNMENT (first_dr) == -1)
7136 if (DR_VECT_AUX (first_dr)->base_element_aligned)
7137 align = TYPE_ALIGN_UNIT (elem_type);
7138 else
7139 align = (get_object_alignment (DR_REF (first_dr))
7140 / BITS_PER_UNIT);
7141 misalign = 0;
7142 TREE_TYPE (data_ref)
7143 = build_aligned_type (TREE_TYPE (data_ref),
7144 align * BITS_PER_UNIT);
7146 else
7148 TREE_TYPE (data_ref)
7149 = build_aligned_type (TREE_TYPE (data_ref),
7150 TYPE_ALIGN (elem_type));
7151 misalign = DR_MISALIGNMENT (first_dr);
7153 if (dataref_offset == NULL_TREE
7154 && TREE_CODE (dataref_ptr) == SSA_NAME)
7155 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7156 align, misalign);
7157 break;
7159 case dr_explicit_realign:
7161 tree ptr, bump;
7163 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7165 if (compute_in_loop)
7166 msq = vect_setup_realignment (first_stmt, gsi,
7167 &realignment_token,
7168 dr_explicit_realign,
7169 dataref_ptr, NULL);
7171 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7172 ptr = copy_ssa_name (dataref_ptr);
7173 else
7174 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7175 new_stmt = gimple_build_assign
7176 (ptr, BIT_AND_EXPR, dataref_ptr,
7177 build_int_cst
7178 (TREE_TYPE (dataref_ptr),
7179 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7180 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7181 data_ref
7182 = build2 (MEM_REF, vectype, ptr,
7183 build_int_cst (reference_alias_ptr_type
7184 (DR_REF (first_dr)), 0));
7185 vec_dest = vect_create_destination_var (scalar_dest,
7186 vectype);
7187 new_stmt = gimple_build_assign (vec_dest, data_ref);
7188 new_temp = make_ssa_name (vec_dest, new_stmt);
7189 gimple_assign_set_lhs (new_stmt, new_temp);
7190 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7191 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7192 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7193 msq = new_temp;
7195 bump = size_binop (MULT_EXPR, vs,
7196 TYPE_SIZE_UNIT (elem_type));
7197 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7198 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7199 new_stmt = gimple_build_assign
7200 (NULL_TREE, BIT_AND_EXPR, ptr,
7201 build_int_cst
7202 (TREE_TYPE (ptr),
7203 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7204 ptr = copy_ssa_name (ptr, new_stmt);
7205 gimple_assign_set_lhs (new_stmt, ptr);
7206 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7207 data_ref
7208 = build2 (MEM_REF, vectype, ptr,
7209 build_int_cst (reference_alias_ptr_type
7210 (DR_REF (first_dr)), 0));
7211 break;
7213 case dr_explicit_realign_optimized:
7214 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7215 new_temp = copy_ssa_name (dataref_ptr);
7216 else
7217 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7218 new_stmt = gimple_build_assign
7219 (new_temp, BIT_AND_EXPR, dataref_ptr,
7220 build_int_cst
7221 (TREE_TYPE (dataref_ptr),
7222 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7223 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7224 data_ref
7225 = build2 (MEM_REF, vectype, new_temp,
7226 build_int_cst (reference_alias_ptr_type
7227 (DR_REF (first_dr)), 0));
7228 break;
7229 default:
7230 gcc_unreachable ();
7232 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7233 new_stmt = gimple_build_assign (vec_dest, data_ref);
7234 new_temp = make_ssa_name (vec_dest, new_stmt);
7235 gimple_assign_set_lhs (new_stmt, new_temp);
7236 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7238 /* 3. Handle explicit realignment if necessary/supported.
7239 Create in loop:
7240 vec_dest = realign_load (msq, lsq, realignment_token) */
7241 if (alignment_support_scheme == dr_explicit_realign_optimized
7242 || alignment_support_scheme == dr_explicit_realign)
7244 lsq = gimple_assign_lhs (new_stmt);
7245 if (!realignment_token)
7246 realignment_token = dataref_ptr;
7247 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7248 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7249 msq, lsq, realignment_token);
7250 new_temp = make_ssa_name (vec_dest, new_stmt);
7251 gimple_assign_set_lhs (new_stmt, new_temp);
7252 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7254 if (alignment_support_scheme == dr_explicit_realign_optimized)
7256 gcc_assert (phi);
7257 if (i == vec_num - 1 && j == ncopies - 1)
7258 add_phi_arg (phi, lsq,
7259 loop_latch_edge (containing_loop),
7260 UNKNOWN_LOCATION);
7261 msq = lsq;
7265 /* 4. Handle invariant-load. */
7266 if (inv_p && !bb_vinfo)
7268 gcc_assert (!grouped_load);
7269 /* If we have versioned for aliasing or the loop doesn't
7270 have any data dependencies that would preclude this,
7271 then we are sure this is a loop invariant load and
7272 thus we can insert it on the preheader edge. */
7273 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7274 && !nested_in_vect_loop
7275 && hoist_defs_of_uses (stmt, loop))
7277 if (dump_enabled_p ())
7279 dump_printf_loc (MSG_NOTE, vect_location,
7280 "hoisting out of the vectorized "
7281 "loop: ");
7282 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7284 tree tem = copy_ssa_name (scalar_dest);
7285 gsi_insert_on_edge_immediate
7286 (loop_preheader_edge (loop),
7287 gimple_build_assign (tem,
7288 unshare_expr
7289 (gimple_assign_rhs1 (stmt))));
7290 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7291 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7292 set_vinfo_for_stmt (new_stmt,
7293 new_stmt_vec_info (new_stmt, vinfo));
7295 else
7297 gimple_stmt_iterator gsi2 = *gsi;
7298 gsi_next (&gsi2);
7299 new_temp = vect_init_vector (stmt, scalar_dest,
7300 vectype, &gsi2);
7301 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7305 if (negative)
7307 tree perm_mask = perm_mask_for_reverse (vectype);
7308 new_temp = permute_vec_elements (new_temp, new_temp,
7309 perm_mask, stmt, gsi);
7310 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7313 /* Collect vector loads and later create their permutation in
7314 vect_transform_grouped_load (). */
7315 if (grouped_load || slp_perm)
7316 dr_chain.quick_push (new_temp);
7318 /* Store vector loads in the corresponding SLP_NODE. */
7319 if (slp && !slp_perm)
7320 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7322 /* Bump the vector pointer to account for a gap or for excess
7323 elements loaded for a permuted SLP load. */
7324 if (group_gap_adj != 0)
7326 bool ovf;
7327 tree bump
7328 = wide_int_to_tree (sizetype,
7329 wi::smul (TYPE_SIZE_UNIT (elem_type),
7330 group_gap_adj, &ovf));
7331 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7332 stmt, bump);
7336 if (slp && !slp_perm)
7337 continue;
7339 if (slp_perm)
7341 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7342 slp_node_instance, false))
7344 dr_chain.release ();
7345 return false;
7348 else
7350 if (grouped_load)
7352 if (!load_lanes_p)
7353 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7354 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7356 else
7358 if (j == 0)
7359 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7360 else
7361 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7362 prev_stmt_info = vinfo_for_stmt (new_stmt);
7365 dr_chain.release ();
7368 return true;
7371 /* Function vect_is_simple_cond.
7373 Input:
7374 LOOP - the loop that is being vectorized.
7375 COND - Condition that is checked for simple use.
7377 Output:
7378 *COMP_VECTYPE - the vector type for the comparison.
7380 Returns whether a COND can be vectorized. Checks whether
7381 condition operands are supportable using vec_is_simple_use. */
7383 static bool
7384 vect_is_simple_cond (tree cond, vec_info *vinfo, tree *comp_vectype)
7386 tree lhs, rhs;
7387 enum vect_def_type dt;
7388 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7390 /* Mask case. */
7391 if (TREE_CODE (cond) == SSA_NAME
7392 && TREE_CODE (TREE_TYPE (cond)) == BOOLEAN_TYPE)
7394 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7395 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7396 &dt, comp_vectype)
7397 || !*comp_vectype
7398 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7399 return false;
7400 return true;
7403 if (!COMPARISON_CLASS_P (cond))
7404 return false;
7406 lhs = TREE_OPERAND (cond, 0);
7407 rhs = TREE_OPERAND (cond, 1);
7409 if (TREE_CODE (lhs) == SSA_NAME)
7411 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7412 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dt, &vectype1))
7413 return false;
7415 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
7416 && TREE_CODE (lhs) != FIXED_CST)
7417 return false;
7419 if (TREE_CODE (rhs) == SSA_NAME)
7421 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7422 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dt, &vectype2))
7423 return false;
7425 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
7426 && TREE_CODE (rhs) != FIXED_CST)
7427 return false;
7429 if (vectype1 && vectype2
7430 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7431 return false;
7433 *comp_vectype = vectype1 ? vectype1 : vectype2;
7434 return true;
7437 /* vectorizable_condition.
7439 Check if STMT is conditional modify expression that can be vectorized.
7440 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7441 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7442 at GSI.
7444 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7445 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7446 else clause if it is 2).
7448 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7450 bool
7451 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7452 gimple **vec_stmt, tree reduc_def, int reduc_index,
7453 slp_tree slp_node)
7455 tree scalar_dest = NULL_TREE;
7456 tree vec_dest = NULL_TREE;
7457 tree cond_expr, then_clause, else_clause;
7458 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7459 tree comp_vectype = NULL_TREE;
7460 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7461 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7462 tree vec_compare;
7463 tree new_temp;
7464 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7465 enum vect_def_type dt, dts[4];
7466 int ncopies;
7467 enum tree_code code;
7468 stmt_vec_info prev_stmt_info = NULL;
7469 int i, j;
7470 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7471 vec<tree> vec_oprnds0 = vNULL;
7472 vec<tree> vec_oprnds1 = vNULL;
7473 vec<tree> vec_oprnds2 = vNULL;
7474 vec<tree> vec_oprnds3 = vNULL;
7475 tree vec_cmp_type;
7476 bool masked = false;
7478 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7479 return false;
7481 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7483 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7484 return false;
7486 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7487 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7488 && reduc_def))
7489 return false;
7491 /* FORNOW: not yet supported. */
7492 if (STMT_VINFO_LIVE_P (stmt_info))
7494 if (dump_enabled_p ())
7495 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7496 "value used after loop.\n");
7497 return false;
7501 /* Is vectorizable conditional operation? */
7502 if (!is_gimple_assign (stmt))
7503 return false;
7505 code = gimple_assign_rhs_code (stmt);
7507 if (code != COND_EXPR)
7508 return false;
7510 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7511 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
7512 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7514 if (slp_node)
7515 ncopies = 1;
7516 else
7517 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7519 gcc_assert (ncopies >= 1);
7520 if (reduc_index && ncopies > 1)
7521 return false; /* FORNOW */
7523 cond_expr = gimple_assign_rhs1 (stmt);
7524 then_clause = gimple_assign_rhs2 (stmt);
7525 else_clause = gimple_assign_rhs3 (stmt);
7527 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, &comp_vectype)
7528 || !comp_vectype)
7529 return false;
7531 gimple *def_stmt;
7532 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dt,
7533 &vectype1))
7534 return false;
7535 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dt,
7536 &vectype2))
7537 return false;
7539 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7540 return false;
7542 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
7543 return false;
7545 masked = !COMPARISON_CLASS_P (cond_expr);
7546 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7548 if (vec_cmp_type == NULL_TREE)
7549 return false;
7551 if (!vec_stmt)
7553 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7554 return expand_vec_cond_expr_p (vectype, comp_vectype);
7557 /* Transform. */
7559 if (!slp_node)
7561 vec_oprnds0.create (1);
7562 vec_oprnds1.create (1);
7563 vec_oprnds2.create (1);
7564 vec_oprnds3.create (1);
7567 /* Handle def. */
7568 scalar_dest = gimple_assign_lhs (stmt);
7569 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7571 /* Handle cond expr. */
7572 for (j = 0; j < ncopies; j++)
7574 gassign *new_stmt = NULL;
7575 if (j == 0)
7577 if (slp_node)
7579 auto_vec<tree, 4> ops;
7580 auto_vec<vec<tree>, 4> vec_defs;
7582 if (masked)
7583 ops.safe_push (cond_expr);
7584 else
7586 ops.safe_push (TREE_OPERAND (cond_expr, 0));
7587 ops.safe_push (TREE_OPERAND (cond_expr, 1));
7589 ops.safe_push (then_clause);
7590 ops.safe_push (else_clause);
7591 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7592 vec_oprnds3 = vec_defs.pop ();
7593 vec_oprnds2 = vec_defs.pop ();
7594 if (!masked)
7595 vec_oprnds1 = vec_defs.pop ();
7596 vec_oprnds0 = vec_defs.pop ();
7598 ops.release ();
7599 vec_defs.release ();
7601 else
7603 gimple *gtemp;
7604 if (masked)
7606 vec_cond_lhs
7607 = vect_get_vec_def_for_operand (cond_expr, stmt,
7608 comp_vectype);
7609 vect_is_simple_use (cond_expr, stmt_info->vinfo,
7610 &gtemp, &dts[0]);
7612 else
7614 vec_cond_lhs =
7615 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
7616 stmt, comp_vectype);
7617 vect_is_simple_use (TREE_OPERAND (cond_expr, 0),
7618 loop_vinfo, &gtemp, &dts[0]);
7620 vec_cond_rhs =
7621 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
7622 stmt, comp_vectype);
7623 vect_is_simple_use (TREE_OPERAND (cond_expr, 1),
7624 loop_vinfo, &gtemp, &dts[1]);
7626 if (reduc_index == 1)
7627 vec_then_clause = reduc_def;
7628 else
7630 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
7631 stmt);
7632 vect_is_simple_use (then_clause, loop_vinfo,
7633 &gtemp, &dts[2]);
7635 if (reduc_index == 2)
7636 vec_else_clause = reduc_def;
7637 else
7639 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
7640 stmt);
7641 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
7645 else
7647 vec_cond_lhs
7648 = vect_get_vec_def_for_stmt_copy (dts[0],
7649 vec_oprnds0.pop ());
7650 if (!masked)
7651 vec_cond_rhs
7652 = vect_get_vec_def_for_stmt_copy (dts[1],
7653 vec_oprnds1.pop ());
7655 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
7656 vec_oprnds2.pop ());
7657 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
7658 vec_oprnds3.pop ());
7661 if (!slp_node)
7663 vec_oprnds0.quick_push (vec_cond_lhs);
7664 if (!masked)
7665 vec_oprnds1.quick_push (vec_cond_rhs);
7666 vec_oprnds2.quick_push (vec_then_clause);
7667 vec_oprnds3.quick_push (vec_else_clause);
7670 /* Arguments are ready. Create the new vector stmt. */
7671 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
7673 vec_then_clause = vec_oprnds2[i];
7674 vec_else_clause = vec_oprnds3[i];
7676 if (masked)
7677 vec_compare = vec_cond_lhs;
7678 else
7680 vec_cond_rhs = vec_oprnds1[i];
7681 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
7682 vec_cond_lhs, vec_cond_rhs);
7684 new_temp = make_ssa_name (vec_dest);
7685 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
7686 vec_compare, vec_then_clause,
7687 vec_else_clause);
7688 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7689 if (slp_node)
7690 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7693 if (slp_node)
7694 continue;
7696 if (j == 0)
7697 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7698 else
7699 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7701 prev_stmt_info = vinfo_for_stmt (new_stmt);
7704 vec_oprnds0.release ();
7705 vec_oprnds1.release ();
7706 vec_oprnds2.release ();
7707 vec_oprnds3.release ();
7709 return true;
7712 /* vectorizable_comparison.
7714 Check if STMT is comparison expression that can be vectorized.
7715 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7716 comparison, put it in VEC_STMT, and insert it at GSI.
7718 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7720 static bool
7721 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
7722 gimple **vec_stmt, tree reduc_def,
7723 slp_tree slp_node)
7725 tree lhs, rhs1, rhs2;
7726 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7727 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7728 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7729 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
7730 tree new_temp;
7731 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7732 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
7733 unsigned nunits;
7734 int ncopies;
7735 enum tree_code code;
7736 stmt_vec_info prev_stmt_info = NULL;
7737 int i, j;
7738 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7739 vec<tree> vec_oprnds0 = vNULL;
7740 vec<tree> vec_oprnds1 = vNULL;
7741 gimple *def_stmt;
7742 tree mask_type;
7743 tree mask;
7745 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7746 return false;
7748 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
7749 return false;
7751 mask_type = vectype;
7752 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7754 if (slp_node)
7755 ncopies = 1;
7756 else
7757 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7759 gcc_assert (ncopies >= 1);
7760 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7761 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7762 && reduc_def))
7763 return false;
7765 if (STMT_VINFO_LIVE_P (stmt_info))
7767 if (dump_enabled_p ())
7768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7769 "value used after loop.\n");
7770 return false;
7773 if (!is_gimple_assign (stmt))
7774 return false;
7776 code = gimple_assign_rhs_code (stmt);
7778 if (TREE_CODE_CLASS (code) != tcc_comparison)
7779 return false;
7781 rhs1 = gimple_assign_rhs1 (stmt);
7782 rhs2 = gimple_assign_rhs2 (stmt);
7784 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
7785 &dts[0], &vectype1))
7786 return false;
7788 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
7789 &dts[1], &vectype2))
7790 return false;
7792 if (vectype1 && vectype2
7793 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7794 return false;
7796 vectype = vectype1 ? vectype1 : vectype2;
7798 /* Invariant comparison. */
7799 if (!vectype)
7801 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
7802 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
7803 return false;
7805 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
7806 return false;
7808 if (!vec_stmt)
7810 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
7811 vect_model_simple_cost (stmt_info, ncopies, dts, NULL, NULL);
7812 return expand_vec_cmp_expr_p (vectype, mask_type);
7815 /* Transform. */
7816 if (!slp_node)
7818 vec_oprnds0.create (1);
7819 vec_oprnds1.create (1);
7822 /* Handle def. */
7823 lhs = gimple_assign_lhs (stmt);
7824 mask = vect_create_destination_var (lhs, mask_type);
7826 /* Handle cmp expr. */
7827 for (j = 0; j < ncopies; j++)
7829 gassign *new_stmt = NULL;
7830 if (j == 0)
7832 if (slp_node)
7834 auto_vec<tree, 2> ops;
7835 auto_vec<vec<tree>, 2> vec_defs;
7837 ops.safe_push (rhs1);
7838 ops.safe_push (rhs2);
7839 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7840 vec_oprnds1 = vec_defs.pop ();
7841 vec_oprnds0 = vec_defs.pop ();
7843 else
7845 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
7846 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
7849 else
7851 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
7852 vec_oprnds0.pop ());
7853 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
7854 vec_oprnds1.pop ());
7857 if (!slp_node)
7859 vec_oprnds0.quick_push (vec_rhs1);
7860 vec_oprnds1.quick_push (vec_rhs2);
7863 /* Arguments are ready. Create the new vector stmt. */
7864 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
7866 vec_rhs2 = vec_oprnds1[i];
7868 new_temp = make_ssa_name (mask);
7869 new_stmt = gimple_build_assign (new_temp, code, vec_rhs1, vec_rhs2);
7870 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7871 if (slp_node)
7872 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7875 if (slp_node)
7876 continue;
7878 if (j == 0)
7879 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7880 else
7881 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7883 prev_stmt_info = vinfo_for_stmt (new_stmt);
7886 vec_oprnds0.release ();
7887 vec_oprnds1.release ();
7889 return true;
7892 /* Make sure the statement is vectorizable. */
7894 bool
7895 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
7897 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7898 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7899 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
7900 bool ok;
7901 tree scalar_type, vectype;
7902 gimple *pattern_stmt;
7903 gimple_seq pattern_def_seq;
7905 if (dump_enabled_p ())
7907 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
7908 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7911 if (gimple_has_volatile_ops (stmt))
7913 if (dump_enabled_p ())
7914 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7915 "not vectorized: stmt has volatile operands\n");
7917 return false;
7920 /* Skip stmts that do not need to be vectorized. In loops this is expected
7921 to include:
7922 - the COND_EXPR which is the loop exit condition
7923 - any LABEL_EXPRs in the loop
7924 - computations that are used only for array indexing or loop control.
7925 In basic blocks we only analyze statements that are a part of some SLP
7926 instance, therefore, all the statements are relevant.
7928 Pattern statement needs to be analyzed instead of the original statement
7929 if the original statement is not relevant. Otherwise, we analyze both
7930 statements. In basic blocks we are called from some SLP instance
7931 traversal, don't analyze pattern stmts instead, the pattern stmts
7932 already will be part of SLP instance. */
7934 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
7935 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7936 && !STMT_VINFO_LIVE_P (stmt_info))
7938 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7939 && pattern_stmt
7940 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7941 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7943 /* Analyze PATTERN_STMT instead of the original stmt. */
7944 stmt = pattern_stmt;
7945 stmt_info = vinfo_for_stmt (pattern_stmt);
7946 if (dump_enabled_p ())
7948 dump_printf_loc (MSG_NOTE, vect_location,
7949 "==> examining pattern statement: ");
7950 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7953 else
7955 if (dump_enabled_p ())
7956 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
7958 return true;
7961 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7962 && node == NULL
7963 && pattern_stmt
7964 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7965 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7967 /* Analyze PATTERN_STMT too. */
7968 if (dump_enabled_p ())
7970 dump_printf_loc (MSG_NOTE, vect_location,
7971 "==> examining pattern statement: ");
7972 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7975 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7976 return false;
7979 if (is_pattern_stmt_p (stmt_info)
7980 && node == NULL
7981 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
7983 gimple_stmt_iterator si;
7985 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7987 gimple *pattern_def_stmt = gsi_stmt (si);
7988 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7989 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7991 /* Analyze def stmt of STMT if it's a pattern stmt. */
7992 if (dump_enabled_p ())
7994 dump_printf_loc (MSG_NOTE, vect_location,
7995 "==> examining pattern def statement: ");
7996 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
7999 if (!vect_analyze_stmt (pattern_def_stmt,
8000 need_to_vectorize, node))
8001 return false;
8006 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8008 case vect_internal_def:
8009 break;
8011 case vect_reduction_def:
8012 case vect_nested_cycle:
8013 gcc_assert (!bb_vinfo
8014 && (relevance == vect_used_in_outer
8015 || relevance == vect_used_in_outer_by_reduction
8016 || relevance == vect_used_by_reduction
8017 || relevance == vect_unused_in_scope));
8018 break;
8020 case vect_induction_def:
8021 case vect_constant_def:
8022 case vect_external_def:
8023 case vect_unknown_def_type:
8024 default:
8025 gcc_unreachable ();
8028 if (bb_vinfo)
8030 gcc_assert (PURE_SLP_STMT (stmt_info));
8032 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
8033 if (dump_enabled_p ())
8035 dump_printf_loc (MSG_NOTE, vect_location,
8036 "get vectype for scalar type: ");
8037 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
8038 dump_printf (MSG_NOTE, "\n");
8041 vectype = get_vectype_for_scalar_type (scalar_type);
8042 if (!vectype)
8044 if (dump_enabled_p ())
8046 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8047 "not SLPed: unsupported data-type ");
8048 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
8049 scalar_type);
8050 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8052 return false;
8055 if (dump_enabled_p ())
8057 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
8058 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
8059 dump_printf (MSG_NOTE, "\n");
8062 STMT_VINFO_VECTYPE (stmt_info) = vectype;
8065 if (STMT_VINFO_RELEVANT_P (stmt_info))
8067 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8068 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8069 || (is_gimple_call (stmt)
8070 && gimple_call_lhs (stmt) == NULL_TREE));
8071 *need_to_vectorize = true;
8074 if (PURE_SLP_STMT (stmt_info) && !node)
8076 dump_printf_loc (MSG_NOTE, vect_location,
8077 "handled only by SLP analysis\n");
8078 return true;
8081 ok = true;
8082 if (!bb_vinfo
8083 && (STMT_VINFO_RELEVANT_P (stmt_info)
8084 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8085 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8086 || vectorizable_conversion (stmt, NULL, NULL, node)
8087 || vectorizable_shift (stmt, NULL, NULL, node)
8088 || vectorizable_operation (stmt, NULL, NULL, node)
8089 || vectorizable_assignment (stmt, NULL, NULL, node)
8090 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8091 || vectorizable_call (stmt, NULL, NULL, node)
8092 || vectorizable_store (stmt, NULL, NULL, node)
8093 || vectorizable_reduction (stmt, NULL, NULL, node)
8094 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8095 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8096 else
8098 if (bb_vinfo)
8099 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8100 || vectorizable_conversion (stmt, NULL, NULL, node)
8101 || vectorizable_shift (stmt, NULL, NULL, node)
8102 || vectorizable_operation (stmt, NULL, NULL, node)
8103 || vectorizable_assignment (stmt, NULL, NULL, node)
8104 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8105 || vectorizable_call (stmt, NULL, NULL, node)
8106 || vectorizable_store (stmt, NULL, NULL, node)
8107 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8108 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8111 if (!ok)
8113 if (dump_enabled_p ())
8115 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8116 "not vectorized: relevant stmt not ");
8117 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8118 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8121 return false;
8124 if (bb_vinfo)
8125 return true;
8127 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8128 need extra handling, except for vectorizable reductions. */
8129 if (STMT_VINFO_LIVE_P (stmt_info)
8130 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8131 ok = vectorizable_live_operation (stmt, NULL, NULL);
8133 if (!ok)
8135 if (dump_enabled_p ())
8137 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8138 "not vectorized: live stmt not ");
8139 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8140 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8143 return false;
8146 return true;
8150 /* Function vect_transform_stmt.
8152 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8154 bool
8155 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8156 bool *grouped_store, slp_tree slp_node,
8157 slp_instance slp_node_instance)
8159 bool is_store = false;
8160 gimple *vec_stmt = NULL;
8161 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8162 bool done;
8164 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
8165 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8167 switch (STMT_VINFO_TYPE (stmt_info))
8169 case type_demotion_vec_info_type:
8170 case type_promotion_vec_info_type:
8171 case type_conversion_vec_info_type:
8172 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8173 gcc_assert (done);
8174 break;
8176 case induc_vec_info_type:
8177 gcc_assert (!slp_node);
8178 done = vectorizable_induction (stmt, gsi, &vec_stmt);
8179 gcc_assert (done);
8180 break;
8182 case shift_vec_info_type:
8183 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8184 gcc_assert (done);
8185 break;
8187 case op_vec_info_type:
8188 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8189 gcc_assert (done);
8190 break;
8192 case assignment_vec_info_type:
8193 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8194 gcc_assert (done);
8195 break;
8197 case load_vec_info_type:
8198 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8199 slp_node_instance);
8200 gcc_assert (done);
8201 break;
8203 case store_vec_info_type:
8204 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8205 gcc_assert (done);
8206 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8208 /* In case of interleaving, the whole chain is vectorized when the
8209 last store in the chain is reached. Store stmts before the last
8210 one are skipped, and there vec_stmt_info shouldn't be freed
8211 meanwhile. */
8212 *grouped_store = true;
8213 if (STMT_VINFO_VEC_STMT (stmt_info))
8214 is_store = true;
8216 else
8217 is_store = true;
8218 break;
8220 case condition_vec_info_type:
8221 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8222 gcc_assert (done);
8223 break;
8225 case comparison_vec_info_type:
8226 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8227 gcc_assert (done);
8228 break;
8230 case call_vec_info_type:
8231 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8232 stmt = gsi_stmt (*gsi);
8233 if (is_gimple_call (stmt)
8234 && gimple_call_internal_p (stmt)
8235 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
8236 is_store = true;
8237 break;
8239 case call_simd_clone_vec_info_type:
8240 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8241 stmt = gsi_stmt (*gsi);
8242 break;
8244 case reduc_vec_info_type:
8245 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
8246 gcc_assert (done);
8247 break;
8249 default:
8250 if (!STMT_VINFO_LIVE_P (stmt_info))
8252 if (dump_enabled_p ())
8253 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8254 "stmt not supported.\n");
8255 gcc_unreachable ();
8259 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8260 This would break hybrid SLP vectorization. */
8261 if (slp_node)
8262 gcc_assert (!vec_stmt
8263 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8265 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8266 is being vectorized, but outside the immediately enclosing loop. */
8267 if (vec_stmt
8268 && STMT_VINFO_LOOP_VINFO (stmt_info)
8269 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8270 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8271 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8272 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8273 || STMT_VINFO_RELEVANT (stmt_info) ==
8274 vect_used_in_outer_by_reduction))
8276 struct loop *innerloop = LOOP_VINFO_LOOP (
8277 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8278 imm_use_iterator imm_iter;
8279 use_operand_p use_p;
8280 tree scalar_dest;
8281 gimple *exit_phi;
8283 if (dump_enabled_p ())
8284 dump_printf_loc (MSG_NOTE, vect_location,
8285 "Record the vdef for outer-loop vectorization.\n");
8287 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8288 (to be used when vectorizing outer-loop stmts that use the DEF of
8289 STMT). */
8290 if (gimple_code (stmt) == GIMPLE_PHI)
8291 scalar_dest = PHI_RESULT (stmt);
8292 else
8293 scalar_dest = gimple_assign_lhs (stmt);
8295 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8297 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8299 exit_phi = USE_STMT (use_p);
8300 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8305 /* Handle stmts whose DEF is used outside the loop-nest that is
8306 being vectorized. */
8307 if (STMT_VINFO_LIVE_P (stmt_info)
8308 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8310 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
8311 gcc_assert (done);
8314 if (vec_stmt)
8315 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8317 return is_store;
8321 /* Remove a group of stores (for SLP or interleaving), free their
8322 stmt_vec_info. */
8324 void
8325 vect_remove_stores (gimple *first_stmt)
8327 gimple *next = first_stmt;
8328 gimple *tmp;
8329 gimple_stmt_iterator next_si;
8331 while (next)
8333 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8335 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8336 if (is_pattern_stmt_p (stmt_info))
8337 next = STMT_VINFO_RELATED_STMT (stmt_info);
8338 /* Free the attached stmt_vec_info and remove the stmt. */
8339 next_si = gsi_for_stmt (next);
8340 unlink_stmt_vdef (next);
8341 gsi_remove (&next_si, true);
8342 release_defs (next);
8343 free_stmt_vec_info (next);
8344 next = tmp;
8349 /* Function new_stmt_vec_info.
8351 Create and initialize a new stmt_vec_info struct for STMT. */
8353 stmt_vec_info
8354 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
8356 stmt_vec_info res;
8357 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8359 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8360 STMT_VINFO_STMT (res) = stmt;
8361 res->vinfo = vinfo;
8362 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
8363 STMT_VINFO_LIVE_P (res) = false;
8364 STMT_VINFO_VECTYPE (res) = NULL;
8365 STMT_VINFO_VEC_STMT (res) = NULL;
8366 STMT_VINFO_VECTORIZABLE (res) = true;
8367 STMT_VINFO_IN_PATTERN_P (res) = false;
8368 STMT_VINFO_RELATED_STMT (res) = NULL;
8369 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
8370 STMT_VINFO_DATA_REF (res) = NULL;
8371 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
8373 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
8374 STMT_VINFO_DR_OFFSET (res) = NULL;
8375 STMT_VINFO_DR_INIT (res) = NULL;
8376 STMT_VINFO_DR_STEP (res) = NULL;
8377 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
8379 if (gimple_code (stmt) == GIMPLE_PHI
8380 && is_loop_header_bb_p (gimple_bb (stmt)))
8381 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8382 else
8383 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8385 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
8386 STMT_SLP_TYPE (res) = loop_vect;
8387 STMT_VINFO_NUM_SLP_USES (res) = 0;
8389 GROUP_FIRST_ELEMENT (res) = NULL;
8390 GROUP_NEXT_ELEMENT (res) = NULL;
8391 GROUP_SIZE (res) = 0;
8392 GROUP_STORE_COUNT (res) = 0;
8393 GROUP_GAP (res) = 0;
8394 GROUP_SAME_DR_STMT (res) = NULL;
8396 return res;
8400 /* Create a hash table for stmt_vec_info. */
8402 void
8403 init_stmt_vec_info_vec (void)
8405 gcc_assert (!stmt_vec_info_vec.exists ());
8406 stmt_vec_info_vec.create (50);
8410 /* Free hash table for stmt_vec_info. */
8412 void
8413 free_stmt_vec_info_vec (void)
8415 unsigned int i;
8416 stmt_vec_info info;
8417 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
8418 if (info != NULL)
8419 free_stmt_vec_info (STMT_VINFO_STMT (info));
8420 gcc_assert (stmt_vec_info_vec.exists ());
8421 stmt_vec_info_vec.release ();
8425 /* Free stmt vectorization related info. */
8427 void
8428 free_stmt_vec_info (gimple *stmt)
8430 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8432 if (!stmt_info)
8433 return;
8435 /* Check if this statement has a related "pattern stmt"
8436 (introduced by the vectorizer during the pattern recognition
8437 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8438 too. */
8439 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
8441 stmt_vec_info patt_info
8442 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8443 if (patt_info)
8445 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
8446 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
8447 gimple_set_bb (patt_stmt, NULL);
8448 tree lhs = gimple_get_lhs (patt_stmt);
8449 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8450 release_ssa_name (lhs);
8451 if (seq)
8453 gimple_stmt_iterator si;
8454 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
8456 gimple *seq_stmt = gsi_stmt (si);
8457 gimple_set_bb (seq_stmt, NULL);
8458 lhs = gimple_get_lhs (seq_stmt);
8459 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8460 release_ssa_name (lhs);
8461 free_stmt_vec_info (seq_stmt);
8464 free_stmt_vec_info (patt_stmt);
8468 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
8469 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
8470 set_vinfo_for_stmt (stmt, NULL);
8471 free (stmt_info);
8475 /* Function get_vectype_for_scalar_type_and_size.
8477 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8478 by the target. */
8480 static tree
8481 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
8483 machine_mode inner_mode = TYPE_MODE (scalar_type);
8484 machine_mode simd_mode;
8485 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
8486 int nunits;
8487 tree vectype;
8489 if (nbytes == 0)
8490 return NULL_TREE;
8492 if (GET_MODE_CLASS (inner_mode) != MODE_INT
8493 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
8494 return NULL_TREE;
8496 /* For vector types of elements whose mode precision doesn't
8497 match their types precision we use a element type of mode
8498 precision. The vectorization routines will have to make sure
8499 they support the proper result truncation/extension.
8500 We also make sure to build vector types with INTEGER_TYPE
8501 component type only. */
8502 if (INTEGRAL_TYPE_P (scalar_type)
8503 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
8504 || TREE_CODE (scalar_type) != INTEGER_TYPE))
8505 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
8506 TYPE_UNSIGNED (scalar_type));
8508 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8509 When the component mode passes the above test simply use a type
8510 corresponding to that mode. The theory is that any use that
8511 would cause problems with this will disable vectorization anyway. */
8512 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
8513 && !INTEGRAL_TYPE_P (scalar_type))
8514 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
8516 /* We can't build a vector type of elements with alignment bigger than
8517 their size. */
8518 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
8519 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
8520 TYPE_UNSIGNED (scalar_type));
8522 /* If we felt back to using the mode fail if there was
8523 no scalar type for it. */
8524 if (scalar_type == NULL_TREE)
8525 return NULL_TREE;
8527 /* If no size was supplied use the mode the target prefers. Otherwise
8528 lookup a vector mode of the specified size. */
8529 if (size == 0)
8530 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
8531 else
8532 simd_mode = mode_for_vector (inner_mode, size / nbytes);
8533 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
8534 if (nunits <= 1)
8535 return NULL_TREE;
8537 vectype = build_vector_type (scalar_type, nunits);
8539 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8540 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
8541 return NULL_TREE;
8543 return vectype;
8546 unsigned int current_vector_size;
8548 /* Function get_vectype_for_scalar_type.
8550 Returns the vector type corresponding to SCALAR_TYPE as supported
8551 by the target. */
8553 tree
8554 get_vectype_for_scalar_type (tree scalar_type)
8556 tree vectype;
8557 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
8558 current_vector_size);
8559 if (vectype
8560 && current_vector_size == 0)
8561 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
8562 return vectype;
8565 /* Function get_mask_type_for_scalar_type.
8567 Returns the mask type corresponding to a result of comparison
8568 of vectors of specified SCALAR_TYPE as supported by target. */
8570 tree
8571 get_mask_type_for_scalar_type (tree scalar_type)
8573 tree vectype = get_vectype_for_scalar_type (scalar_type);
8575 if (!vectype)
8576 return NULL;
8578 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
8579 current_vector_size);
8582 /* Function get_same_sized_vectype
8584 Returns a vector type corresponding to SCALAR_TYPE of size
8585 VECTOR_TYPE if supported by the target. */
8587 tree
8588 get_same_sized_vectype (tree scalar_type, tree vector_type)
8590 if (TREE_CODE (scalar_type) == BOOLEAN_TYPE)
8591 return build_same_sized_truth_vector_type (vector_type);
8593 return get_vectype_for_scalar_type_and_size
8594 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
8597 /* Function vect_is_simple_use.
8599 Input:
8600 VINFO - the vect info of the loop or basic block that is being vectorized.
8601 OPERAND - operand in the loop or bb.
8602 Output:
8603 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
8604 DT - the type of definition
8606 Returns whether a stmt with OPERAND can be vectorized.
8607 For loops, supportable operands are constants, loop invariants, and operands
8608 that are defined by the current iteration of the loop. Unsupportable
8609 operands are those that are defined by a previous iteration of the loop (as
8610 is the case in reduction/induction computations).
8611 For basic blocks, supportable operands are constants and bb invariants.
8612 For now, operands defined outside the basic block are not supported. */
8614 bool
8615 vect_is_simple_use (tree operand, vec_info *vinfo,
8616 gimple **def_stmt, enum vect_def_type *dt)
8618 *def_stmt = NULL;
8619 *dt = vect_unknown_def_type;
8621 if (dump_enabled_p ())
8623 dump_printf_loc (MSG_NOTE, vect_location,
8624 "vect_is_simple_use: operand ");
8625 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
8626 dump_printf (MSG_NOTE, "\n");
8629 if (CONSTANT_CLASS_P (operand))
8631 *dt = vect_constant_def;
8632 return true;
8635 if (is_gimple_min_invariant (operand))
8637 *dt = vect_external_def;
8638 return true;
8641 if (TREE_CODE (operand) != SSA_NAME)
8643 if (dump_enabled_p ())
8644 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8645 "not ssa-name.\n");
8646 return false;
8649 if (SSA_NAME_IS_DEFAULT_DEF (operand))
8651 *dt = vect_external_def;
8652 return true;
8655 *def_stmt = SSA_NAME_DEF_STMT (operand);
8656 if (dump_enabled_p ())
8658 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
8659 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
8662 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
8663 *dt = vect_external_def;
8664 else
8666 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
8667 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
8670 if (dump_enabled_p ())
8672 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
8673 switch (*dt)
8675 case vect_uninitialized_def:
8676 dump_printf (MSG_NOTE, "uninitialized\n");
8677 break;
8678 case vect_constant_def:
8679 dump_printf (MSG_NOTE, "constant\n");
8680 break;
8681 case vect_external_def:
8682 dump_printf (MSG_NOTE, "external\n");
8683 break;
8684 case vect_internal_def:
8685 dump_printf (MSG_NOTE, "internal\n");
8686 break;
8687 case vect_induction_def:
8688 dump_printf (MSG_NOTE, "induction\n");
8689 break;
8690 case vect_reduction_def:
8691 dump_printf (MSG_NOTE, "reduction\n");
8692 break;
8693 case vect_double_reduction_def:
8694 dump_printf (MSG_NOTE, "double reduction\n");
8695 break;
8696 case vect_nested_cycle:
8697 dump_printf (MSG_NOTE, "nested cycle\n");
8698 break;
8699 case vect_unknown_def_type:
8700 dump_printf (MSG_NOTE, "unknown\n");
8701 break;
8705 if (*dt == vect_unknown_def_type)
8707 if (dump_enabled_p ())
8708 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8709 "Unsupported pattern.\n");
8710 return false;
8713 switch (gimple_code (*def_stmt))
8715 case GIMPLE_PHI:
8716 case GIMPLE_ASSIGN:
8717 case GIMPLE_CALL:
8718 break;
8719 default:
8720 if (dump_enabled_p ())
8721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8722 "unsupported defining stmt:\n");
8723 return false;
8726 return true;
8729 /* Function vect_is_simple_use.
8731 Same as vect_is_simple_use but also determines the vector operand
8732 type of OPERAND and stores it to *VECTYPE. If the definition of
8733 OPERAND is vect_uninitialized_def, vect_constant_def or
8734 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8735 is responsible to compute the best suited vector type for the
8736 scalar operand. */
8738 bool
8739 vect_is_simple_use (tree operand, vec_info *vinfo,
8740 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
8742 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
8743 return false;
8745 /* Now get a vector type if the def is internal, otherwise supply
8746 NULL_TREE and leave it up to the caller to figure out a proper
8747 type for the use stmt. */
8748 if (*dt == vect_internal_def
8749 || *dt == vect_induction_def
8750 || *dt == vect_reduction_def
8751 || *dt == vect_double_reduction_def
8752 || *dt == vect_nested_cycle)
8754 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
8756 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8757 && !STMT_VINFO_RELEVANT (stmt_info)
8758 && !STMT_VINFO_LIVE_P (stmt_info))
8759 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8761 *vectype = STMT_VINFO_VECTYPE (stmt_info);
8762 gcc_assert (*vectype != NULL_TREE);
8764 else if (*dt == vect_uninitialized_def
8765 || *dt == vect_constant_def
8766 || *dt == vect_external_def)
8767 *vectype = NULL_TREE;
8768 else
8769 gcc_unreachable ();
8771 return true;
8775 /* Function supportable_widening_operation
8777 Check whether an operation represented by the code CODE is a
8778 widening operation that is supported by the target platform in
8779 vector form (i.e., when operating on arguments of type VECTYPE_IN
8780 producing a result of type VECTYPE_OUT).
8782 Widening operations we currently support are NOP (CONVERT), FLOAT
8783 and WIDEN_MULT. This function checks if these operations are supported
8784 by the target platform either directly (via vector tree-codes), or via
8785 target builtins.
8787 Output:
8788 - CODE1 and CODE2 are codes of vector operations to be used when
8789 vectorizing the operation, if available.
8790 - MULTI_STEP_CVT determines the number of required intermediate steps in
8791 case of multi-step conversion (like char->short->int - in that case
8792 MULTI_STEP_CVT will be 1).
8793 - INTERM_TYPES contains the intermediate type required to perform the
8794 widening operation (short in the above example). */
8796 bool
8797 supportable_widening_operation (enum tree_code code, gimple *stmt,
8798 tree vectype_out, tree vectype_in,
8799 enum tree_code *code1, enum tree_code *code2,
8800 int *multi_step_cvt,
8801 vec<tree> *interm_types)
8803 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8804 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
8805 struct loop *vect_loop = NULL;
8806 machine_mode vec_mode;
8807 enum insn_code icode1, icode2;
8808 optab optab1, optab2;
8809 tree vectype = vectype_in;
8810 tree wide_vectype = vectype_out;
8811 enum tree_code c1, c2;
8812 int i;
8813 tree prev_type, intermediate_type;
8814 machine_mode intermediate_mode, prev_mode;
8815 optab optab3, optab4;
8817 *multi_step_cvt = 0;
8818 if (loop_info)
8819 vect_loop = LOOP_VINFO_LOOP (loop_info);
8821 switch (code)
8823 case WIDEN_MULT_EXPR:
8824 /* The result of a vectorized widening operation usually requires
8825 two vectors (because the widened results do not fit into one vector).
8826 The generated vector results would normally be expected to be
8827 generated in the same order as in the original scalar computation,
8828 i.e. if 8 results are generated in each vector iteration, they are
8829 to be organized as follows:
8830 vect1: [res1,res2,res3,res4],
8831 vect2: [res5,res6,res7,res8].
8833 However, in the special case that the result of the widening
8834 operation is used in a reduction computation only, the order doesn't
8835 matter (because when vectorizing a reduction we change the order of
8836 the computation). Some targets can take advantage of this and
8837 generate more efficient code. For example, targets like Altivec,
8838 that support widen_mult using a sequence of {mult_even,mult_odd}
8839 generate the following vectors:
8840 vect1: [res1,res3,res5,res7],
8841 vect2: [res2,res4,res6,res8].
8843 When vectorizing outer-loops, we execute the inner-loop sequentially
8844 (each vectorized inner-loop iteration contributes to VF outer-loop
8845 iterations in parallel). We therefore don't allow to change the
8846 order of the computation in the inner-loop during outer-loop
8847 vectorization. */
8848 /* TODO: Another case in which order doesn't *really* matter is when we
8849 widen and then contract again, e.g. (short)((int)x * y >> 8).
8850 Normally, pack_trunc performs an even/odd permute, whereas the
8851 repack from an even/odd expansion would be an interleave, which
8852 would be significantly simpler for e.g. AVX2. */
8853 /* In any case, in order to avoid duplicating the code below, recurse
8854 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8855 are properly set up for the caller. If we fail, we'll continue with
8856 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8857 if (vect_loop
8858 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
8859 && !nested_in_vect_loop_p (vect_loop, stmt)
8860 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
8861 stmt, vectype_out, vectype_in,
8862 code1, code2, multi_step_cvt,
8863 interm_types))
8865 /* Elements in a vector with vect_used_by_reduction property cannot
8866 be reordered if the use chain with this property does not have the
8867 same operation. One such an example is s += a * b, where elements
8868 in a and b cannot be reordered. Here we check if the vector defined
8869 by STMT is only directly used in the reduction statement. */
8870 tree lhs = gimple_assign_lhs (stmt);
8871 use_operand_p dummy;
8872 gimple *use_stmt;
8873 stmt_vec_info use_stmt_info = NULL;
8874 if (single_imm_use (lhs, &dummy, &use_stmt)
8875 && (use_stmt_info = vinfo_for_stmt (use_stmt))
8876 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
8877 return true;
8879 c1 = VEC_WIDEN_MULT_LO_EXPR;
8880 c2 = VEC_WIDEN_MULT_HI_EXPR;
8881 break;
8883 case DOT_PROD_EXPR:
8884 c1 = DOT_PROD_EXPR;
8885 c2 = DOT_PROD_EXPR;
8886 break;
8888 case SAD_EXPR:
8889 c1 = SAD_EXPR;
8890 c2 = SAD_EXPR;
8891 break;
8893 case VEC_WIDEN_MULT_EVEN_EXPR:
8894 /* Support the recursion induced just above. */
8895 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
8896 c2 = VEC_WIDEN_MULT_ODD_EXPR;
8897 break;
8899 case WIDEN_LSHIFT_EXPR:
8900 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
8901 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
8902 break;
8904 CASE_CONVERT:
8905 c1 = VEC_UNPACK_LO_EXPR;
8906 c2 = VEC_UNPACK_HI_EXPR;
8907 break;
8909 case FLOAT_EXPR:
8910 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
8911 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
8912 break;
8914 case FIX_TRUNC_EXPR:
8915 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8916 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8917 computing the operation. */
8918 return false;
8920 default:
8921 gcc_unreachable ();
8924 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
8925 std::swap (c1, c2);
8927 if (code == FIX_TRUNC_EXPR)
8929 /* The signedness is determined from output operand. */
8930 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8931 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
8933 else
8935 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8936 optab2 = optab_for_tree_code (c2, vectype, optab_default);
8939 if (!optab1 || !optab2)
8940 return false;
8942 vec_mode = TYPE_MODE (vectype);
8943 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
8944 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
8945 return false;
8947 *code1 = c1;
8948 *code2 = c2;
8950 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8951 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8952 /* For scalar masks we may have different boolean
8953 vector types having the same QImode. Thus we
8954 add additional check for elements number. */
8955 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
8956 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
8957 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
8959 /* Check if it's a multi-step conversion that can be done using intermediate
8960 types. */
8962 prev_type = vectype;
8963 prev_mode = vec_mode;
8965 if (!CONVERT_EXPR_CODE_P (code))
8966 return false;
8968 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8969 intermediate steps in promotion sequence. We try
8970 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8971 not. */
8972 interm_types->create (MAX_INTERM_CVT_STEPS);
8973 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8975 intermediate_mode = insn_data[icode1].operand[0].mode;
8976 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
8978 intermediate_type
8979 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
8980 current_vector_size);
8981 if (intermediate_mode != TYPE_MODE (intermediate_type))
8982 return false;
8984 else
8985 intermediate_type
8986 = lang_hooks.types.type_for_mode (intermediate_mode,
8987 TYPE_UNSIGNED (prev_type));
8989 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
8990 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
8992 if (!optab3 || !optab4
8993 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
8994 || insn_data[icode1].operand[0].mode != intermediate_mode
8995 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
8996 || insn_data[icode2].operand[0].mode != intermediate_mode
8997 || ((icode1 = optab_handler (optab3, intermediate_mode))
8998 == CODE_FOR_nothing)
8999 || ((icode2 = optab_handler (optab4, intermediate_mode))
9000 == CODE_FOR_nothing))
9001 break;
9003 interm_types->quick_push (intermediate_type);
9004 (*multi_step_cvt)++;
9006 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9007 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9008 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9009 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9010 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9012 prev_type = intermediate_type;
9013 prev_mode = intermediate_mode;
9016 interm_types->release ();
9017 return false;
9021 /* Function supportable_narrowing_operation
9023 Check whether an operation represented by the code CODE is a
9024 narrowing operation that is supported by the target platform in
9025 vector form (i.e., when operating on arguments of type VECTYPE_IN
9026 and producing a result of type VECTYPE_OUT).
9028 Narrowing operations we currently support are NOP (CONVERT) and
9029 FIX_TRUNC. This function checks if these operations are supported by
9030 the target platform directly via vector tree-codes.
9032 Output:
9033 - CODE1 is the code of a vector operation to be used when
9034 vectorizing the operation, if available.
9035 - MULTI_STEP_CVT determines the number of required intermediate steps in
9036 case of multi-step conversion (like int->short->char - in that case
9037 MULTI_STEP_CVT will be 1).
9038 - INTERM_TYPES contains the intermediate type required to perform the
9039 narrowing operation (short in the above example). */
9041 bool
9042 supportable_narrowing_operation (enum tree_code code,
9043 tree vectype_out, tree vectype_in,
9044 enum tree_code *code1, int *multi_step_cvt,
9045 vec<tree> *interm_types)
9047 machine_mode vec_mode;
9048 enum insn_code icode1;
9049 optab optab1, interm_optab;
9050 tree vectype = vectype_in;
9051 tree narrow_vectype = vectype_out;
9052 enum tree_code c1;
9053 tree intermediate_type, prev_type;
9054 machine_mode intermediate_mode, prev_mode;
9055 int i;
9056 bool uns;
9058 *multi_step_cvt = 0;
9059 switch (code)
9061 CASE_CONVERT:
9062 c1 = VEC_PACK_TRUNC_EXPR;
9063 break;
9065 case FIX_TRUNC_EXPR:
9066 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9067 break;
9069 case FLOAT_EXPR:
9070 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9071 tree code and optabs used for computing the operation. */
9072 return false;
9074 default:
9075 gcc_unreachable ();
9078 if (code == FIX_TRUNC_EXPR)
9079 /* The signedness is determined from output operand. */
9080 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9081 else
9082 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9084 if (!optab1)
9085 return false;
9087 vec_mode = TYPE_MODE (vectype);
9088 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9089 return false;
9091 *code1 = c1;
9093 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9094 /* For scalar masks we may have different boolean
9095 vector types having the same QImode. Thus we
9096 add additional check for elements number. */
9097 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9098 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9099 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9101 /* Check if it's a multi-step conversion that can be done using intermediate
9102 types. */
9103 prev_mode = vec_mode;
9104 prev_type = vectype;
9105 if (code == FIX_TRUNC_EXPR)
9106 uns = TYPE_UNSIGNED (vectype_out);
9107 else
9108 uns = TYPE_UNSIGNED (vectype);
9110 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9111 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9112 costly than signed. */
9113 if (code == FIX_TRUNC_EXPR && uns)
9115 enum insn_code icode2;
9117 intermediate_type
9118 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9119 interm_optab
9120 = optab_for_tree_code (c1, intermediate_type, optab_default);
9121 if (interm_optab != unknown_optab
9122 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9123 && insn_data[icode1].operand[0].mode
9124 == insn_data[icode2].operand[0].mode)
9126 uns = false;
9127 optab1 = interm_optab;
9128 icode1 = icode2;
9132 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9133 intermediate steps in promotion sequence. We try
9134 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9135 interm_types->create (MAX_INTERM_CVT_STEPS);
9136 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9138 intermediate_mode = insn_data[icode1].operand[0].mode;
9139 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9141 intermediate_type
9142 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9143 current_vector_size);
9144 if (intermediate_mode != TYPE_MODE (intermediate_type))
9145 return false;
9147 else
9148 intermediate_type
9149 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9150 interm_optab
9151 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9152 optab_default);
9153 if (!interm_optab
9154 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9155 || insn_data[icode1].operand[0].mode != intermediate_mode
9156 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9157 == CODE_FOR_nothing))
9158 break;
9160 interm_types->quick_push (intermediate_type);
9161 (*multi_step_cvt)++;
9163 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9164 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9165 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9166 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9168 prev_mode = intermediate_mode;
9169 prev_type = intermediate_type;
9170 optab1 = interm_optab;
9173 interm_types->release ();
9174 return false;