Daily bump.
[official-gcc.git] / gcc / tree-vect-stmts.c
blob6149f08c21d06e3ccfeb9887b502d3c8a40f8ae5
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Return the vectorized type for the given statement. */
57 tree
58 stmt_vectype (struct _stmt_vec_info *stmt_info)
60 return STMT_VINFO_VECTYPE (stmt_info);
63 /* Return TRUE iff the given statement is in an inner loop relative to
64 the loop being vectorized. */
65 bool
66 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
68 gimple *stmt = STMT_VINFO_STMT (stmt_info);
69 basic_block bb = gimple_bb (stmt);
70 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
71 struct loop* loop;
73 if (!loop_vinfo)
74 return false;
76 loop = LOOP_VINFO_LOOP (loop_vinfo);
78 return (bb->loop_father == loop->inner);
81 /* Record the cost of a statement, either by directly informing the
82 target model or by saving it in a vector for later processing.
83 Return a preliminary estimate of the statement's cost. */
85 unsigned
86 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
87 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
88 int misalign, enum vect_cost_model_location where)
90 if (body_cost_vec)
92 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
93 stmt_info_for_cost si = { count, kind,
94 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
95 misalign };
96 body_cost_vec->safe_push (si);
97 return (unsigned)
98 (builtin_vectorization_cost (kind, vectype, misalign) * count);
100 else
101 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
102 count, kind, stmt_info, misalign, where);
105 /* Return a variable of type ELEM_TYPE[NELEMS]. */
107 static tree
108 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
110 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
111 "vect_array");
114 /* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
119 static tree
120 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
121 tree array, unsigned HOST_WIDE_INT n)
123 tree vect_type, vect, vect_name, array_ref;
124 gimple *new_stmt;
126 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
127 vect_type = TREE_TYPE (TREE_TYPE (array));
128 vect = vect_create_destination_var (scalar_dest, vect_type);
129 array_ref = build4 (ARRAY_REF, vect_type, array,
130 build_int_cst (size_type_node, n),
131 NULL_TREE, NULL_TREE);
133 new_stmt = gimple_build_assign (vect, array_ref);
134 vect_name = make_ssa_name (vect, new_stmt);
135 gimple_assign_set_lhs (new_stmt, vect_name);
136 vect_finish_stmt_generation (stmt, new_stmt, gsi);
138 return vect_name;
141 /* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
145 static void
146 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
147 tree array, unsigned HOST_WIDE_INT n)
149 tree array_ref;
150 gimple *new_stmt;
152 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
153 build_int_cst (size_type_node, n),
154 NULL_TREE, NULL_TREE);
156 new_stmt = gimple_build_assign (array_ref, vect);
157 vect_finish_stmt_generation (stmt, new_stmt, gsi);
160 /* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
162 (and its group). */
164 static tree
165 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
167 tree mem_ref, alias_ptr_type;
169 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
170 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
171 /* Arrays have the same alignment as their type. */
172 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
173 return mem_ref;
176 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
178 /* Function vect_mark_relevant.
180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
182 static void
183 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
184 enum vect_relevant relevant, bool live_p)
186 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
187 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
188 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
189 gimple *pattern_stmt;
191 if (dump_enabled_p ())
193 dump_printf_loc (MSG_NOTE, vect_location,
194 "mark relevant %d, live %d: ", relevant, live_p);
195 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
198 /* If this stmt is an original stmt in a pattern, we might need to mark its
199 related pattern stmt instead of the original stmt. However, such stmts
200 may have their own uses that are not in any pattern, in such cases the
201 stmt itself should be marked. */
202 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
204 /* This is the last stmt in a sequence that was detected as a
205 pattern that can potentially be vectorized. Don't mark the stmt
206 as relevant/live because it's not going to be vectorized.
207 Instead mark the pattern-stmt that replaces it. */
209 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
211 if (dump_enabled_p ())
212 dump_printf_loc (MSG_NOTE, vect_location,
213 "last stmt in pattern. don't mark"
214 " relevant/live.\n");
215 stmt_info = vinfo_for_stmt (pattern_stmt);
216 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
217 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
218 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
219 stmt = pattern_stmt;
222 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
223 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
224 STMT_VINFO_RELEVANT (stmt_info) = relevant;
226 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
227 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
229 if (dump_enabled_p ())
230 dump_printf_loc (MSG_NOTE, vect_location,
231 "already marked relevant/live.\n");
232 return;
235 worklist->safe_push (stmt);
239 /* Function vect_stmt_relevant_p.
241 Return true if STMT in loop that is represented by LOOP_VINFO is
242 "relevant for vectorization".
244 A stmt is considered "relevant for vectorization" if:
245 - it has uses outside the loop.
246 - it has vdefs (it alters memory).
247 - control stmts in the loop (except for the exit condition).
249 CHECKME: what other side effects would the vectorizer allow? */
251 static bool
252 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
253 enum vect_relevant *relevant, bool *live_p)
255 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
256 ssa_op_iter op_iter;
257 imm_use_iterator imm_iter;
258 use_operand_p use_p;
259 def_operand_p def_p;
261 *relevant = vect_unused_in_scope;
262 *live_p = false;
264 /* cond stmt other than loop exit cond. */
265 if (is_ctrl_stmt (stmt)
266 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
267 != loop_exit_ctrl_vec_info_type)
268 *relevant = vect_used_in_scope;
270 /* changing memory. */
271 if (gimple_code (stmt) != GIMPLE_PHI)
272 if (gimple_vdef (stmt)
273 && !gimple_clobber_p (stmt))
275 if (dump_enabled_p ())
276 dump_printf_loc (MSG_NOTE, vect_location,
277 "vec_stmt_relevant_p: stmt has vdefs.\n");
278 *relevant = vect_used_in_scope;
281 /* uses outside the loop. */
282 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
284 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
286 basic_block bb = gimple_bb (USE_STMT (use_p));
287 if (!flow_bb_inside_loop_p (loop, bb))
289 if (dump_enabled_p ())
290 dump_printf_loc (MSG_NOTE, vect_location,
291 "vec_stmt_relevant_p: used out of loop.\n");
293 if (is_gimple_debug (USE_STMT (use_p)))
294 continue;
296 /* We expect all such uses to be in the loop exit phis
297 (because of loop closed form) */
298 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
299 gcc_assert (bb == single_exit (loop)->dest);
301 *live_p = true;
306 return (*live_p || *relevant);
310 /* Function exist_non_indexing_operands_for_use_p
312 USE is one of the uses attached to STMT. Check if USE is
313 used in STMT for anything other than indexing an array. */
315 static bool
316 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
318 tree operand;
319 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
321 /* USE corresponds to some operand in STMT. If there is no data
322 reference in STMT, then any operand that corresponds to USE
323 is not indexing an array. */
324 if (!STMT_VINFO_DATA_REF (stmt_info))
325 return true;
327 /* STMT has a data_ref. FORNOW this means that its of one of
328 the following forms:
329 -1- ARRAY_REF = var
330 -2- var = ARRAY_REF
331 (This should have been verified in analyze_data_refs).
333 'var' in the second case corresponds to a def, not a use,
334 so USE cannot correspond to any operands that are not used
335 for array indexing.
337 Therefore, all we need to check is if STMT falls into the
338 first case, and whether var corresponds to USE. */
340 if (!gimple_assign_copy_p (stmt))
342 if (is_gimple_call (stmt)
343 && gimple_call_internal_p (stmt))
344 switch (gimple_call_internal_fn (stmt))
346 case IFN_MASK_STORE:
347 operand = gimple_call_arg (stmt, 3);
348 if (operand == use)
349 return true;
350 /* FALLTHRU */
351 case IFN_MASK_LOAD:
352 operand = gimple_call_arg (stmt, 2);
353 if (operand == use)
354 return true;
355 break;
356 default:
357 break;
359 return false;
362 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
363 return false;
364 operand = gimple_assign_rhs1 (stmt);
365 if (TREE_CODE (operand) != SSA_NAME)
366 return false;
368 if (operand == use)
369 return true;
371 return false;
376 Function process_use.
378 Inputs:
379 - a USE in STMT in a loop represented by LOOP_VINFO
380 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
381 that defined USE. This is done by calling mark_relevant and passing it
382 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
383 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
384 be performed.
386 Outputs:
387 Generally, LIVE_P and RELEVANT are used to define the liveness and
388 relevance info of the DEF_STMT of this USE:
389 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
390 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
391 Exceptions:
392 - case 1: If USE is used only for address computations (e.g. array indexing),
393 which does not need to be directly vectorized, then the liveness/relevance
394 of the respective DEF_STMT is left unchanged.
395 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
396 skip DEF_STMT cause it had already been processed.
397 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
398 be modified accordingly.
400 Return true if everything is as expected. Return false otherwise. */
402 static bool
403 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
404 enum vect_relevant relevant, vec<gimple *> *worklist,
405 bool force)
407 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
408 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
409 stmt_vec_info dstmt_vinfo;
410 basic_block bb, def_bb;
411 gimple *def_stmt;
412 enum vect_def_type dt;
414 /* case 1: we are only interested in uses that need to be vectorized. Uses
415 that are used for address computation are not considered relevant. */
416 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
417 return true;
419 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
421 if (dump_enabled_p ())
422 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
423 "not vectorized: unsupported use in stmt.\n");
424 return false;
427 if (!def_stmt || gimple_nop_p (def_stmt))
428 return true;
430 def_bb = gimple_bb (def_stmt);
431 if (!flow_bb_inside_loop_p (loop, def_bb))
433 if (dump_enabled_p ())
434 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
435 return true;
438 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
439 DEF_STMT must have already been processed, because this should be the
440 only way that STMT, which is a reduction-phi, was put in the worklist,
441 as there should be no other uses for DEF_STMT in the loop. So we just
442 check that everything is as expected, and we are done. */
443 dstmt_vinfo = vinfo_for_stmt (def_stmt);
444 bb = gimple_bb (stmt);
445 if (gimple_code (stmt) == GIMPLE_PHI
446 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
447 && gimple_code (def_stmt) != GIMPLE_PHI
448 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
449 && bb->loop_father == def_bb->loop_father)
451 if (dump_enabled_p ())
452 dump_printf_loc (MSG_NOTE, vect_location,
453 "reduc-stmt defining reduc-phi in the same nest.\n");
454 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
455 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
456 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
457 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
458 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
459 return true;
462 /* case 3a: outer-loop stmt defining an inner-loop stmt:
463 outer-loop-header-bb:
464 d = def_stmt
465 inner-loop:
466 stmt # use (d)
467 outer-loop-tail-bb:
468 ... */
469 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
471 if (dump_enabled_p ())
472 dump_printf_loc (MSG_NOTE, vect_location,
473 "outer-loop def-stmt defining inner-loop stmt.\n");
475 switch (relevant)
477 case vect_unused_in_scope:
478 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
479 vect_used_in_scope : vect_unused_in_scope;
480 break;
482 case vect_used_in_outer_by_reduction:
483 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
484 relevant = vect_used_by_reduction;
485 break;
487 case vect_used_in_outer:
488 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
489 relevant = vect_used_in_scope;
490 break;
492 case vect_used_in_scope:
493 break;
495 default:
496 gcc_unreachable ();
500 /* case 3b: inner-loop stmt defining an outer-loop stmt:
501 outer-loop-header-bb:
503 inner-loop:
504 d = def_stmt
505 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
506 stmt # use (d) */
507 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
509 if (dump_enabled_p ())
510 dump_printf_loc (MSG_NOTE, vect_location,
511 "inner-loop def-stmt defining outer-loop stmt.\n");
513 switch (relevant)
515 case vect_unused_in_scope:
516 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
517 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
518 vect_used_in_outer_by_reduction : vect_unused_in_scope;
519 break;
521 case vect_used_by_reduction:
522 relevant = vect_used_in_outer_by_reduction;
523 break;
525 case vect_used_in_scope:
526 relevant = vect_used_in_outer;
527 break;
529 default:
530 gcc_unreachable ();
534 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
535 return true;
539 /* Function vect_mark_stmts_to_be_vectorized.
541 Not all stmts in the loop need to be vectorized. For example:
543 for i...
544 for j...
545 1. T0 = i + j
546 2. T1 = a[T0]
548 3. j = j + 1
550 Stmt 1 and 3 do not need to be vectorized, because loop control and
551 addressing of vectorized data-refs are handled differently.
553 This pass detects such stmts. */
555 bool
556 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
558 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
559 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
560 unsigned int nbbs = loop->num_nodes;
561 gimple_stmt_iterator si;
562 gimple *stmt;
563 unsigned int i;
564 stmt_vec_info stmt_vinfo;
565 basic_block bb;
566 gimple *phi;
567 bool live_p;
568 enum vect_relevant relevant, tmp_relevant;
569 enum vect_def_type def_type;
571 if (dump_enabled_p ())
572 dump_printf_loc (MSG_NOTE, vect_location,
573 "=== vect_mark_stmts_to_be_vectorized ===\n");
575 auto_vec<gimple *, 64> worklist;
577 /* 1. Init worklist. */
578 for (i = 0; i < nbbs; i++)
580 bb = bbs[i];
581 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
583 phi = gsi_stmt (si);
584 if (dump_enabled_p ())
586 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
587 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
590 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
591 vect_mark_relevant (&worklist, phi, relevant, live_p);
593 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
595 stmt = gsi_stmt (si);
596 if (dump_enabled_p ())
598 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
599 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
602 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
603 vect_mark_relevant (&worklist, stmt, relevant, live_p);
607 /* 2. Process_worklist */
608 while (worklist.length () > 0)
610 use_operand_p use_p;
611 ssa_op_iter iter;
613 stmt = worklist.pop ();
614 if (dump_enabled_p ())
616 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
617 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
620 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
621 (DEF_STMT) as relevant/irrelevant and live/dead according to the
622 liveness and relevance properties of STMT. */
623 stmt_vinfo = vinfo_for_stmt (stmt);
624 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
625 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
627 /* Generally, the liveness and relevance properties of STMT are
628 propagated as is to the DEF_STMTs of its USEs:
629 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
630 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
632 One exception is when STMT has been identified as defining a reduction
633 variable; in this case we set the liveness/relevance as follows:
634 live_p = false
635 relevant = vect_used_by_reduction
636 This is because we distinguish between two kinds of relevant stmts -
637 those that are used by a reduction computation, and those that are
638 (also) used by a regular computation. This allows us later on to
639 identify stmts that are used solely by a reduction, and therefore the
640 order of the results that they produce does not have to be kept. */
642 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
643 tmp_relevant = relevant;
644 switch (def_type)
646 case vect_reduction_def:
647 switch (tmp_relevant)
649 case vect_unused_in_scope:
650 relevant = vect_used_by_reduction;
651 break;
653 case vect_used_by_reduction:
654 if (gimple_code (stmt) == GIMPLE_PHI)
655 break;
656 /* fall through */
658 default:
659 if (dump_enabled_p ())
660 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
661 "unsupported use of reduction.\n");
662 return false;
665 live_p = false;
666 break;
668 case vect_nested_cycle:
669 if (tmp_relevant != vect_unused_in_scope
670 && tmp_relevant != vect_used_in_outer_by_reduction
671 && tmp_relevant != vect_used_in_outer)
673 if (dump_enabled_p ())
674 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
675 "unsupported use of nested cycle.\n");
677 return false;
680 live_p = false;
681 break;
683 case vect_double_reduction_def:
684 if (tmp_relevant != vect_unused_in_scope
685 && tmp_relevant != vect_used_by_reduction)
687 if (dump_enabled_p ())
688 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
689 "unsupported use of double reduction.\n");
691 return false;
694 live_p = false;
695 break;
697 default:
698 break;
701 if (is_pattern_stmt_p (stmt_vinfo))
703 /* Pattern statements are not inserted into the code, so
704 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
705 have to scan the RHS or function arguments instead. */
706 if (is_gimple_assign (stmt))
708 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
709 tree op = gimple_assign_rhs1 (stmt);
711 i = 1;
712 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
714 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
715 live_p, relevant, &worklist, false)
716 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
717 live_p, relevant, &worklist, false))
718 return false;
719 i = 2;
721 for (; i < gimple_num_ops (stmt); i++)
723 op = gimple_op (stmt, i);
724 if (TREE_CODE (op) == SSA_NAME
725 && !process_use (stmt, op, loop_vinfo, live_p, relevant,
726 &worklist, false))
727 return false;
730 else if (is_gimple_call (stmt))
732 for (i = 0; i < gimple_call_num_args (stmt); i++)
734 tree arg = gimple_call_arg (stmt, i);
735 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
736 &worklist, false))
737 return false;
741 else
742 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
744 tree op = USE_FROM_PTR (use_p);
745 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
746 &worklist, false))
747 return false;
750 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
752 tree off;
753 tree decl = vect_check_gather_scatter (stmt, loop_vinfo, NULL, &off, NULL);
754 gcc_assert (decl);
755 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
756 &worklist, true))
757 return false;
759 } /* while worklist */
761 return true;
765 /* Function vect_model_simple_cost.
767 Models cost for simple operations, i.e. those that only emit ncopies of a
768 single op. Right now, this does not account for multiple insns that could
769 be generated for the single vector op. We will handle that shortly. */
771 void
772 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
773 enum vect_def_type *dt,
774 stmt_vector_for_cost *prologue_cost_vec,
775 stmt_vector_for_cost *body_cost_vec)
777 int i;
778 int inside_cost = 0, prologue_cost = 0;
780 /* The SLP costs were already calculated during SLP tree build. */
781 if (PURE_SLP_STMT (stmt_info))
782 return;
784 /* FORNOW: Assuming maximum 2 args per stmts. */
785 for (i = 0; i < 2; i++)
786 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
787 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
788 stmt_info, 0, vect_prologue);
790 /* Pass the inside-of-loop statements to the target-specific cost model. */
791 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
792 stmt_info, 0, vect_body);
794 if (dump_enabled_p ())
795 dump_printf_loc (MSG_NOTE, vect_location,
796 "vect_model_simple_cost: inside_cost = %d, "
797 "prologue_cost = %d .\n", inside_cost, prologue_cost);
801 /* Model cost for type demotion and promotion operations. PWR is normally
802 zero for single-step promotions and demotions. It will be one if
803 two-step promotion/demotion is required, and so on. Each additional
804 step doubles the number of instructions required. */
806 static void
807 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
808 enum vect_def_type *dt, int pwr)
810 int i, tmp;
811 int inside_cost = 0, prologue_cost = 0;
812 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
813 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
814 void *target_cost_data;
816 /* The SLP costs were already calculated during SLP tree build. */
817 if (PURE_SLP_STMT (stmt_info))
818 return;
820 if (loop_vinfo)
821 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
822 else
823 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
825 for (i = 0; i < pwr + 1; i++)
827 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
828 (i + 1) : i;
829 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
830 vec_promote_demote, stmt_info, 0,
831 vect_body);
834 /* FORNOW: Assuming maximum 2 args per stmts. */
835 for (i = 0; i < 2; i++)
836 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
837 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
838 stmt_info, 0, vect_prologue);
840 if (dump_enabled_p ())
841 dump_printf_loc (MSG_NOTE, vect_location,
842 "vect_model_promotion_demotion_cost: inside_cost = %d, "
843 "prologue_cost = %d .\n", inside_cost, prologue_cost);
846 /* Function vect_cost_group_size
848 For grouped load or store, return the group_size only if it is the first
849 load or store of a group, else return 1. This ensures that group size is
850 only returned once per group. */
852 static int
853 vect_cost_group_size (stmt_vec_info stmt_info)
855 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
857 if (first_stmt == STMT_VINFO_STMT (stmt_info))
858 return GROUP_SIZE (stmt_info);
860 return 1;
864 /* Function vect_model_store_cost
866 Models cost for stores. In the case of grouped accesses, one access
867 has the overhead of the grouped access attributed to it. */
869 void
870 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
871 bool store_lanes_p, enum vect_def_type dt,
872 slp_tree slp_node,
873 stmt_vector_for_cost *prologue_cost_vec,
874 stmt_vector_for_cost *body_cost_vec)
876 int group_size;
877 unsigned int inside_cost = 0, prologue_cost = 0;
878 struct data_reference *first_dr;
879 gimple *first_stmt;
881 if (dt == vect_constant_def || dt == vect_external_def)
882 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
883 stmt_info, 0, vect_prologue);
885 /* Grouped access? */
886 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
888 if (slp_node)
890 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
891 group_size = 1;
893 else
895 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
896 group_size = vect_cost_group_size (stmt_info);
899 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
901 /* Not a grouped access. */
902 else
904 group_size = 1;
905 first_dr = STMT_VINFO_DATA_REF (stmt_info);
908 /* We assume that the cost of a single store-lanes instruction is
909 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
910 access is instead being provided by a permute-and-store operation,
911 include the cost of the permutes. */
912 if (!store_lanes_p && group_size > 1
913 && !STMT_VINFO_STRIDED_P (stmt_info))
915 /* Uses a high and low interleave or shuffle operations for each
916 needed permute. */
917 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
918 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
919 stmt_info, 0, vect_body);
921 if (dump_enabled_p ())
922 dump_printf_loc (MSG_NOTE, vect_location,
923 "vect_model_store_cost: strided group_size = %d .\n",
924 group_size);
927 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
928 /* Costs of the stores. */
929 if (STMT_VINFO_STRIDED_P (stmt_info)
930 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
932 /* N scalar stores plus extracting the elements. */
933 inside_cost += record_stmt_cost (body_cost_vec,
934 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
935 scalar_store, stmt_info, 0, vect_body);
937 else
938 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
940 if (STMT_VINFO_STRIDED_P (stmt_info))
941 inside_cost += record_stmt_cost (body_cost_vec,
942 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
943 vec_to_scalar, stmt_info, 0, vect_body);
945 if (dump_enabled_p ())
946 dump_printf_loc (MSG_NOTE, vect_location,
947 "vect_model_store_cost: inside_cost = %d, "
948 "prologue_cost = %d .\n", inside_cost, prologue_cost);
952 /* Calculate cost of DR's memory access. */
953 void
954 vect_get_store_cost (struct data_reference *dr, int ncopies,
955 unsigned int *inside_cost,
956 stmt_vector_for_cost *body_cost_vec)
958 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
959 gimple *stmt = DR_STMT (dr);
960 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
962 switch (alignment_support_scheme)
964 case dr_aligned:
966 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
967 vector_store, stmt_info, 0,
968 vect_body);
970 if (dump_enabled_p ())
971 dump_printf_loc (MSG_NOTE, vect_location,
972 "vect_model_store_cost: aligned.\n");
973 break;
976 case dr_unaligned_supported:
978 /* Here, we assign an additional cost for the unaligned store. */
979 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
980 unaligned_store, stmt_info,
981 DR_MISALIGNMENT (dr), vect_body);
982 if (dump_enabled_p ())
983 dump_printf_loc (MSG_NOTE, vect_location,
984 "vect_model_store_cost: unaligned supported by "
985 "hardware.\n");
986 break;
989 case dr_unaligned_unsupported:
991 *inside_cost = VECT_MAX_COST;
993 if (dump_enabled_p ())
994 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
995 "vect_model_store_cost: unsupported access.\n");
996 break;
999 default:
1000 gcc_unreachable ();
1005 /* Function vect_model_load_cost
1007 Models cost for loads. In the case of grouped accesses, the last access
1008 has the overhead of the grouped access attributed to it. Since unaligned
1009 accesses are supported for loads, we also account for the costs of the
1010 access scheme chosen. */
1012 void
1013 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1014 bool load_lanes_p, slp_tree slp_node,
1015 stmt_vector_for_cost *prologue_cost_vec,
1016 stmt_vector_for_cost *body_cost_vec)
1018 int group_size;
1019 gimple *first_stmt;
1020 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1021 unsigned int inside_cost = 0, prologue_cost = 0;
1023 /* Grouped accesses? */
1024 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1025 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1027 group_size = vect_cost_group_size (stmt_info);
1028 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1030 /* Not a grouped access. */
1031 else
1033 group_size = 1;
1034 first_dr = dr;
1037 /* We assume that the cost of a single load-lanes instruction is
1038 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1039 access is instead being provided by a load-and-permute operation,
1040 include the cost of the permutes. */
1041 if (!load_lanes_p && group_size > 1
1042 && !STMT_VINFO_STRIDED_P (stmt_info))
1044 /* Uses an even and odd extract operations or shuffle operations
1045 for each needed permute. */
1046 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1047 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1048 stmt_info, 0, vect_body);
1050 if (dump_enabled_p ())
1051 dump_printf_loc (MSG_NOTE, vect_location,
1052 "vect_model_load_cost: strided group_size = %d .\n",
1053 group_size);
1056 /* The loads themselves. */
1057 if (STMT_VINFO_STRIDED_P (stmt_info)
1058 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1060 /* N scalar loads plus gathering them into a vector. */
1061 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1062 inside_cost += record_stmt_cost (body_cost_vec,
1063 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1064 scalar_load, stmt_info, 0, vect_body);
1066 else
1067 vect_get_load_cost (first_dr, ncopies,
1068 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1069 || group_size > 1 || slp_node),
1070 &inside_cost, &prologue_cost,
1071 prologue_cost_vec, body_cost_vec, true);
1072 if (STMT_VINFO_STRIDED_P (stmt_info))
1073 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1074 stmt_info, 0, vect_body);
1076 if (dump_enabled_p ())
1077 dump_printf_loc (MSG_NOTE, vect_location,
1078 "vect_model_load_cost: inside_cost = %d, "
1079 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1083 /* Calculate cost of DR's memory access. */
1084 void
1085 vect_get_load_cost (struct data_reference *dr, int ncopies,
1086 bool add_realign_cost, unsigned int *inside_cost,
1087 unsigned int *prologue_cost,
1088 stmt_vector_for_cost *prologue_cost_vec,
1089 stmt_vector_for_cost *body_cost_vec,
1090 bool record_prologue_costs)
1092 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1093 gimple *stmt = DR_STMT (dr);
1094 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1096 switch (alignment_support_scheme)
1098 case dr_aligned:
1100 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1101 stmt_info, 0, vect_body);
1103 if (dump_enabled_p ())
1104 dump_printf_loc (MSG_NOTE, vect_location,
1105 "vect_model_load_cost: aligned.\n");
1107 break;
1109 case dr_unaligned_supported:
1111 /* Here, we assign an additional cost for the unaligned load. */
1112 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1113 unaligned_load, stmt_info,
1114 DR_MISALIGNMENT (dr), vect_body);
1116 if (dump_enabled_p ())
1117 dump_printf_loc (MSG_NOTE, vect_location,
1118 "vect_model_load_cost: unaligned supported by "
1119 "hardware.\n");
1121 break;
1123 case dr_explicit_realign:
1125 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1126 vector_load, stmt_info, 0, vect_body);
1127 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1128 vec_perm, stmt_info, 0, vect_body);
1130 /* FIXME: If the misalignment remains fixed across the iterations of
1131 the containing loop, the following cost should be added to the
1132 prologue costs. */
1133 if (targetm.vectorize.builtin_mask_for_load)
1134 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1135 stmt_info, 0, vect_body);
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_NOTE, vect_location,
1139 "vect_model_load_cost: explicit realign\n");
1141 break;
1143 case dr_explicit_realign_optimized:
1145 if (dump_enabled_p ())
1146 dump_printf_loc (MSG_NOTE, vect_location,
1147 "vect_model_load_cost: unaligned software "
1148 "pipelined.\n");
1150 /* Unaligned software pipeline has a load of an address, an initial
1151 load, and possibly a mask operation to "prime" the loop. However,
1152 if this is an access in a group of loads, which provide grouped
1153 access, then the above cost should only be considered for one
1154 access in the group. Inside the loop, there is a load op
1155 and a realignment op. */
1157 if (add_realign_cost && record_prologue_costs)
1159 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1160 vector_stmt, stmt_info,
1161 0, vect_prologue);
1162 if (targetm.vectorize.builtin_mask_for_load)
1163 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1164 vector_stmt, stmt_info,
1165 0, vect_prologue);
1168 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1169 stmt_info, 0, vect_body);
1170 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1171 stmt_info, 0, vect_body);
1173 if (dump_enabled_p ())
1174 dump_printf_loc (MSG_NOTE, vect_location,
1175 "vect_model_load_cost: explicit realign optimized"
1176 "\n");
1178 break;
1181 case dr_unaligned_unsupported:
1183 *inside_cost = VECT_MAX_COST;
1185 if (dump_enabled_p ())
1186 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1187 "vect_model_load_cost: unsupported access.\n");
1188 break;
1191 default:
1192 gcc_unreachable ();
1196 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1197 the loop preheader for the vectorized stmt STMT. */
1199 static void
1200 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1202 if (gsi)
1203 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1204 else
1206 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1207 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1209 if (loop_vinfo)
1211 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1212 basic_block new_bb;
1213 edge pe;
1215 if (nested_in_vect_loop_p (loop, stmt))
1216 loop = loop->inner;
1218 pe = loop_preheader_edge (loop);
1219 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1220 gcc_assert (!new_bb);
1222 else
1224 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1225 basic_block bb;
1226 gimple_stmt_iterator gsi_bb_start;
1228 gcc_assert (bb_vinfo);
1229 bb = BB_VINFO_BB (bb_vinfo);
1230 gsi_bb_start = gsi_after_labels (bb);
1231 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1235 if (dump_enabled_p ())
1237 dump_printf_loc (MSG_NOTE, vect_location,
1238 "created new init_stmt: ");
1239 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1243 /* Function vect_init_vector.
1245 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1246 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1247 vector type a vector with all elements equal to VAL is created first.
1248 Place the initialization at BSI if it is not NULL. Otherwise, place the
1249 initialization at the loop preheader.
1250 Return the DEF of INIT_STMT.
1251 It will be used in the vectorization of STMT. */
1253 tree
1254 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1256 gimple *init_stmt;
1257 tree new_temp;
1259 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1260 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1262 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1263 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1265 /* Scalar boolean value should be transformed into
1266 all zeros or all ones value before building a vector. */
1267 if (VECTOR_BOOLEAN_TYPE_P (type))
1269 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1270 tree false_val = build_zero_cst (TREE_TYPE (type));
1272 if (CONSTANT_CLASS_P (val))
1273 val = integer_zerop (val) ? false_val : true_val;
1274 else
1276 new_temp = make_ssa_name (TREE_TYPE (type));
1277 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1278 val, true_val, false_val);
1279 vect_init_vector_1 (stmt, init_stmt, gsi);
1280 val = new_temp;
1283 else if (CONSTANT_CLASS_P (val))
1284 val = fold_convert (TREE_TYPE (type), val);
1285 else
1287 new_temp = make_ssa_name (TREE_TYPE (type));
1288 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1289 init_stmt = gimple_build_assign (new_temp,
1290 fold_build1 (VIEW_CONVERT_EXPR,
1291 TREE_TYPE (type),
1292 val));
1293 else
1294 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1295 vect_init_vector_1 (stmt, init_stmt, gsi);
1296 val = new_temp;
1299 val = build_vector_from_val (type, val);
1302 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1303 init_stmt = gimple_build_assign (new_temp, val);
1304 vect_init_vector_1 (stmt, init_stmt, gsi);
1305 return new_temp;
1309 /* Function vect_get_vec_def_for_operand.
1311 OP is an operand in STMT. This function returns a (vector) def that will be
1312 used in the vectorized stmt for STMT.
1314 In the case that OP is an SSA_NAME which is defined in the loop, then
1315 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1317 In case OP is an invariant or constant, a new stmt that creates a vector def
1318 needs to be introduced. VECTYPE may be used to specify a required type for
1319 vector invariant. */
1321 tree
1322 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1324 tree vec_oprnd;
1325 gimple *vec_stmt;
1326 gimple *def_stmt;
1327 stmt_vec_info def_stmt_info = NULL;
1328 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1329 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1330 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1331 enum vect_def_type dt;
1332 bool is_simple_use;
1333 tree vector_type;
1335 if (dump_enabled_p ())
1337 dump_printf_loc (MSG_NOTE, vect_location,
1338 "vect_get_vec_def_for_operand: ");
1339 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1340 dump_printf (MSG_NOTE, "\n");
1343 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1344 gcc_assert (is_simple_use);
1345 if (dump_enabled_p ())
1347 int loc_printed = 0;
1348 if (def_stmt)
1350 if (loc_printed)
1351 dump_printf (MSG_NOTE, " def_stmt = ");
1352 else
1353 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1354 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1358 switch (dt)
1360 /* operand is a constant or a loop invariant. */
1361 case vect_constant_def:
1362 case vect_external_def:
1364 if (vectype)
1365 vector_type = vectype;
1366 else if (TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE
1367 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1368 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1369 else
1370 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1372 gcc_assert (vector_type);
1373 return vect_init_vector (stmt, op, vector_type, NULL);
1376 /* operand is defined inside the loop. */
1377 case vect_internal_def:
1379 /* Get the def from the vectorized stmt. */
1380 def_stmt_info = vinfo_for_stmt (def_stmt);
1382 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1383 /* Get vectorized pattern statement. */
1384 if (!vec_stmt
1385 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1386 && !STMT_VINFO_RELEVANT (def_stmt_info))
1387 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1388 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1389 gcc_assert (vec_stmt);
1390 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1391 vec_oprnd = PHI_RESULT (vec_stmt);
1392 else if (is_gimple_call (vec_stmt))
1393 vec_oprnd = gimple_call_lhs (vec_stmt);
1394 else
1395 vec_oprnd = gimple_assign_lhs (vec_stmt);
1396 return vec_oprnd;
1399 /* operand is defined by a loop header phi - reduction */
1400 case vect_reduction_def:
1401 case vect_double_reduction_def:
1402 case vect_nested_cycle:
1403 /* Code should use get_initial_def_for_reduction. */
1404 gcc_unreachable ();
1406 /* operand is defined by loop-header phi - induction. */
1407 case vect_induction_def:
1409 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1411 /* Get the def from the vectorized stmt. */
1412 def_stmt_info = vinfo_for_stmt (def_stmt);
1413 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1414 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1415 vec_oprnd = PHI_RESULT (vec_stmt);
1416 else
1417 vec_oprnd = gimple_get_lhs (vec_stmt);
1418 return vec_oprnd;
1421 default:
1422 gcc_unreachable ();
1427 /* Function vect_get_vec_def_for_stmt_copy
1429 Return a vector-def for an operand. This function is used when the
1430 vectorized stmt to be created (by the caller to this function) is a "copy"
1431 created in case the vectorized result cannot fit in one vector, and several
1432 copies of the vector-stmt are required. In this case the vector-def is
1433 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1434 of the stmt that defines VEC_OPRND.
1435 DT is the type of the vector def VEC_OPRND.
1437 Context:
1438 In case the vectorization factor (VF) is bigger than the number
1439 of elements that can fit in a vectype (nunits), we have to generate
1440 more than one vector stmt to vectorize the scalar stmt. This situation
1441 arises when there are multiple data-types operated upon in the loop; the
1442 smallest data-type determines the VF, and as a result, when vectorizing
1443 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1444 vector stmt (each computing a vector of 'nunits' results, and together
1445 computing 'VF' results in each iteration). This function is called when
1446 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1447 which VF=16 and nunits=4, so the number of copies required is 4):
1449 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1451 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1452 VS1.1: vx.1 = memref1 VS1.2
1453 VS1.2: vx.2 = memref2 VS1.3
1454 VS1.3: vx.3 = memref3
1456 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1457 VSnew.1: vz1 = vx.1 + ... VSnew.2
1458 VSnew.2: vz2 = vx.2 + ... VSnew.3
1459 VSnew.3: vz3 = vx.3 + ...
1461 The vectorization of S1 is explained in vectorizable_load.
1462 The vectorization of S2:
1463 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1464 the function 'vect_get_vec_def_for_operand' is called to
1465 get the relevant vector-def for each operand of S2. For operand x it
1466 returns the vector-def 'vx.0'.
1468 To create the remaining copies of the vector-stmt (VSnew.j), this
1469 function is called to get the relevant vector-def for each operand. It is
1470 obtained from the respective VS1.j stmt, which is recorded in the
1471 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1473 For example, to obtain the vector-def 'vx.1' in order to create the
1474 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1475 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1476 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1477 and return its def ('vx.1').
1478 Overall, to create the above sequence this function will be called 3 times:
1479 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1480 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1481 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1483 tree
1484 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1486 gimple *vec_stmt_for_operand;
1487 stmt_vec_info def_stmt_info;
1489 /* Do nothing; can reuse same def. */
1490 if (dt == vect_external_def || dt == vect_constant_def )
1491 return vec_oprnd;
1493 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1494 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1495 gcc_assert (def_stmt_info);
1496 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1497 gcc_assert (vec_stmt_for_operand);
1498 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1499 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1500 else
1501 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1502 return vec_oprnd;
1506 /* Get vectorized definitions for the operands to create a copy of an original
1507 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1509 static void
1510 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1511 vec<tree> *vec_oprnds0,
1512 vec<tree> *vec_oprnds1)
1514 tree vec_oprnd = vec_oprnds0->pop ();
1516 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1517 vec_oprnds0->quick_push (vec_oprnd);
1519 if (vec_oprnds1 && vec_oprnds1->length ())
1521 vec_oprnd = vec_oprnds1->pop ();
1522 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1523 vec_oprnds1->quick_push (vec_oprnd);
1528 /* Get vectorized definitions for OP0 and OP1.
1529 REDUC_INDEX is the index of reduction operand in case of reduction,
1530 and -1 otherwise. */
1532 void
1533 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1534 vec<tree> *vec_oprnds0,
1535 vec<tree> *vec_oprnds1,
1536 slp_tree slp_node, int reduc_index)
1538 if (slp_node)
1540 int nops = (op1 == NULL_TREE) ? 1 : 2;
1541 auto_vec<tree> ops (nops);
1542 auto_vec<vec<tree> > vec_defs (nops);
1544 ops.quick_push (op0);
1545 if (op1)
1546 ops.quick_push (op1);
1548 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1550 *vec_oprnds0 = vec_defs[0];
1551 if (op1)
1552 *vec_oprnds1 = vec_defs[1];
1554 else
1556 tree vec_oprnd;
1558 vec_oprnds0->create (1);
1559 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1560 vec_oprnds0->quick_push (vec_oprnd);
1562 if (op1)
1564 vec_oprnds1->create (1);
1565 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1566 vec_oprnds1->quick_push (vec_oprnd);
1572 /* Function vect_finish_stmt_generation.
1574 Insert a new stmt. */
1576 void
1577 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1578 gimple_stmt_iterator *gsi)
1580 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1581 vec_info *vinfo = stmt_info->vinfo;
1583 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1585 if (!gsi_end_p (*gsi)
1586 && gimple_has_mem_ops (vec_stmt))
1588 gimple *at_stmt = gsi_stmt (*gsi);
1589 tree vuse = gimple_vuse (at_stmt);
1590 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1592 tree vdef = gimple_vdef (at_stmt);
1593 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1594 /* If we have an SSA vuse and insert a store, update virtual
1595 SSA form to avoid triggering the renamer. Do so only
1596 if we can easily see all uses - which is what almost always
1597 happens with the way vectorized stmts are inserted. */
1598 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1599 && ((is_gimple_assign (vec_stmt)
1600 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1601 || (is_gimple_call (vec_stmt)
1602 && !(gimple_call_flags (vec_stmt)
1603 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1605 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1606 gimple_set_vdef (vec_stmt, new_vdef);
1607 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1611 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1613 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1615 if (dump_enabled_p ())
1617 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1618 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1621 gimple_set_location (vec_stmt, gimple_location (stmt));
1623 /* While EH edges will generally prevent vectorization, stmt might
1624 e.g. be in a must-not-throw region. Ensure newly created stmts
1625 that could throw are part of the same region. */
1626 int lp_nr = lookup_stmt_eh_lp (stmt);
1627 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1628 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1631 /* We want to vectorize a call to combined function CFN with function
1632 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1633 as the types of all inputs. Check whether this is possible using
1634 an internal function, returning its code if so or IFN_LAST if not. */
1636 static internal_fn
1637 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1638 tree vectype_out, tree vectype_in)
1640 internal_fn ifn;
1641 if (internal_fn_p (cfn))
1642 ifn = as_internal_fn (cfn);
1643 else
1644 ifn = associated_internal_fn (fndecl);
1645 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1647 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1648 if (info.vectorizable)
1650 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1651 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1652 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1653 OPTIMIZE_FOR_SPEED))
1654 return ifn;
1657 return IFN_LAST;
1661 static tree permute_vec_elements (tree, tree, tree, gimple *,
1662 gimple_stmt_iterator *);
1665 /* Function vectorizable_mask_load_store.
1667 Check if STMT performs a conditional load or store that can be vectorized.
1668 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1669 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1670 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1672 static bool
1673 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
1674 gimple **vec_stmt, slp_tree slp_node)
1676 tree vec_dest = NULL;
1677 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1678 stmt_vec_info prev_stmt_info;
1679 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1680 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1681 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1682 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1683 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1684 tree rhs_vectype = NULL_TREE;
1685 tree mask_vectype;
1686 tree elem_type;
1687 gimple *new_stmt;
1688 tree dummy;
1689 tree dataref_ptr = NULL_TREE;
1690 gimple *ptr_incr;
1691 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1692 int ncopies;
1693 int i, j;
1694 bool inv_p;
1695 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1696 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1697 int gather_scale = 1;
1698 enum vect_def_type gather_dt = vect_unknown_def_type;
1699 bool is_store;
1700 tree mask;
1701 gimple *def_stmt;
1702 enum vect_def_type dt;
1704 if (slp_node != NULL)
1705 return false;
1707 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1708 gcc_assert (ncopies >= 1);
1710 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1711 mask = gimple_call_arg (stmt, 2);
1713 if (TREE_CODE (TREE_TYPE (mask)) != BOOLEAN_TYPE)
1714 return false;
1716 /* FORNOW. This restriction should be relaxed. */
1717 if (nested_in_vect_loop && ncopies > 1)
1719 if (dump_enabled_p ())
1720 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1721 "multiple types in nested loop.");
1722 return false;
1725 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1726 return false;
1728 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
1729 && ! vec_stmt)
1730 return false;
1732 if (!STMT_VINFO_DATA_REF (stmt_info))
1733 return false;
1735 elem_type = TREE_TYPE (vectype);
1737 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1738 return false;
1740 if (STMT_VINFO_STRIDED_P (stmt_info))
1741 return false;
1743 if (TREE_CODE (mask) != SSA_NAME)
1744 return false;
1746 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
1747 return false;
1749 if (!mask_vectype)
1750 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
1752 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
1753 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
1754 return false;
1756 if (is_store)
1758 tree rhs = gimple_call_arg (stmt, 3);
1759 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
1760 return false;
1763 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1765 gimple *def_stmt;
1766 gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
1767 &gather_off, &gather_scale);
1768 gcc_assert (gather_decl);
1769 if (!vect_is_simple_use (gather_off, loop_vinfo, &def_stmt, &gather_dt,
1770 &gather_off_vectype))
1772 if (dump_enabled_p ())
1773 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1774 "gather index use not simple.");
1775 return false;
1778 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1779 tree masktype
1780 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1781 if (TREE_CODE (masktype) == INTEGER_TYPE)
1783 if (dump_enabled_p ())
1784 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1785 "masked gather with integer mask not supported.");
1786 return false;
1789 else if (tree_int_cst_compare (nested_in_vect_loop
1790 ? STMT_VINFO_DR_STEP (stmt_info)
1791 : DR_STEP (dr), size_zero_node) <= 0)
1792 return false;
1793 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1794 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
1795 TYPE_MODE (mask_vectype),
1796 !is_store)
1797 || (rhs_vectype
1798 && !useless_type_conversion_p (vectype, rhs_vectype)))
1799 return false;
1801 if (!vec_stmt) /* transformation not required. */
1803 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1804 if (is_store)
1805 vect_model_store_cost (stmt_info, ncopies, false, dt,
1806 NULL, NULL, NULL);
1807 else
1808 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1809 return true;
1812 /** Transform. **/
1814 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1816 tree vec_oprnd0 = NULL_TREE, op;
1817 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1818 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1819 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1820 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1821 tree mask_perm_mask = NULL_TREE;
1822 edge pe = loop_preheader_edge (loop);
1823 gimple_seq seq;
1824 basic_block new_bb;
1825 enum { NARROW, NONE, WIDEN } modifier;
1826 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1828 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1829 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1830 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1831 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1832 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1833 scaletype = TREE_VALUE (arglist);
1834 gcc_checking_assert (types_compatible_p (srctype, rettype)
1835 && types_compatible_p (srctype, masktype));
1837 if (nunits == gather_off_nunits)
1838 modifier = NONE;
1839 else if (nunits == gather_off_nunits / 2)
1841 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1842 modifier = WIDEN;
1844 for (i = 0; i < gather_off_nunits; ++i)
1845 sel[i] = i | nunits;
1847 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
1849 else if (nunits == gather_off_nunits * 2)
1851 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1852 modifier = NARROW;
1854 for (i = 0; i < nunits; ++i)
1855 sel[i] = i < gather_off_nunits
1856 ? i : i + nunits - gather_off_nunits;
1858 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
1859 ncopies *= 2;
1860 for (i = 0; i < nunits; ++i)
1861 sel[i] = i | gather_off_nunits;
1862 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
1864 else
1865 gcc_unreachable ();
1867 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1869 ptr = fold_convert (ptrtype, gather_base);
1870 if (!is_gimple_min_invariant (ptr))
1872 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1873 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1874 gcc_assert (!new_bb);
1877 scale = build_int_cst (scaletype, gather_scale);
1879 prev_stmt_info = NULL;
1880 for (j = 0; j < ncopies; ++j)
1882 if (modifier == WIDEN && (j & 1))
1883 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1884 perm_mask, stmt, gsi);
1885 else if (j == 0)
1886 op = vec_oprnd0
1887 = vect_get_vec_def_for_operand (gather_off, stmt);
1888 else
1889 op = vec_oprnd0
1890 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1892 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1894 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1895 == TYPE_VECTOR_SUBPARTS (idxtype));
1896 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
1897 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1898 new_stmt
1899 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1900 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1901 op = var;
1904 if (mask_perm_mask && (j & 1))
1905 mask_op = permute_vec_elements (mask_op, mask_op,
1906 mask_perm_mask, stmt, gsi);
1907 else
1909 if (j == 0)
1910 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
1911 else
1913 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
1914 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1917 mask_op = vec_mask;
1918 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1920 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
1921 == TYPE_VECTOR_SUBPARTS (masktype));
1922 var = vect_get_new_ssa_name (masktype, vect_simple_var);
1923 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
1924 new_stmt
1925 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
1926 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1927 mask_op = var;
1931 new_stmt
1932 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
1933 scale);
1935 if (!useless_type_conversion_p (vectype, rettype))
1937 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
1938 == TYPE_VECTOR_SUBPARTS (rettype));
1939 op = vect_get_new_ssa_name (rettype, vect_simple_var);
1940 gimple_call_set_lhs (new_stmt, op);
1941 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1942 var = make_ssa_name (vec_dest);
1943 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
1944 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1946 else
1948 var = make_ssa_name (vec_dest, new_stmt);
1949 gimple_call_set_lhs (new_stmt, var);
1952 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1954 if (modifier == NARROW)
1956 if ((j & 1) == 0)
1958 prev_res = var;
1959 continue;
1961 var = permute_vec_elements (prev_res, var,
1962 perm_mask, stmt, gsi);
1963 new_stmt = SSA_NAME_DEF_STMT (var);
1966 if (prev_stmt_info == NULL)
1967 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1968 else
1969 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1970 prev_stmt_info = vinfo_for_stmt (new_stmt);
1973 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
1974 from the IL. */
1975 if (STMT_VINFO_RELATED_STMT (stmt_info))
1977 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1978 stmt_info = vinfo_for_stmt (stmt);
1980 tree lhs = gimple_call_lhs (stmt);
1981 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
1982 set_vinfo_for_stmt (new_stmt, stmt_info);
1983 set_vinfo_for_stmt (stmt, NULL);
1984 STMT_VINFO_STMT (stmt_info) = new_stmt;
1985 gsi_replace (gsi, new_stmt, true);
1986 return true;
1988 else if (is_store)
1990 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
1991 prev_stmt_info = NULL;
1992 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
1993 for (i = 0; i < ncopies; i++)
1995 unsigned align, misalign;
1997 if (i == 0)
1999 tree rhs = gimple_call_arg (stmt, 3);
2000 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2001 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2002 /* We should have catched mismatched types earlier. */
2003 gcc_assert (useless_type_conversion_p (vectype,
2004 TREE_TYPE (vec_rhs)));
2005 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2006 NULL_TREE, &dummy, gsi,
2007 &ptr_incr, false, &inv_p);
2008 gcc_assert (!inv_p);
2010 else
2012 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
2013 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2014 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2015 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2016 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2017 TYPE_SIZE_UNIT (vectype));
2020 align = TYPE_ALIGN_UNIT (vectype);
2021 if (aligned_access_p (dr))
2022 misalign = 0;
2023 else if (DR_MISALIGNMENT (dr) == -1)
2025 align = TYPE_ALIGN_UNIT (elem_type);
2026 misalign = 0;
2028 else
2029 misalign = DR_MISALIGNMENT (dr);
2030 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2031 misalign);
2032 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2033 misalign ? misalign & -misalign : align);
2034 new_stmt
2035 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2036 ptr, vec_mask, vec_rhs);
2037 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2038 if (i == 0)
2039 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2040 else
2041 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2042 prev_stmt_info = vinfo_for_stmt (new_stmt);
2045 else
2047 tree vec_mask = NULL_TREE;
2048 prev_stmt_info = NULL;
2049 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2050 for (i = 0; i < ncopies; i++)
2052 unsigned align, misalign;
2054 if (i == 0)
2056 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2057 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2058 NULL_TREE, &dummy, gsi,
2059 &ptr_incr, false, &inv_p);
2060 gcc_assert (!inv_p);
2062 else
2064 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2065 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2066 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2067 TYPE_SIZE_UNIT (vectype));
2070 align = TYPE_ALIGN_UNIT (vectype);
2071 if (aligned_access_p (dr))
2072 misalign = 0;
2073 else if (DR_MISALIGNMENT (dr) == -1)
2075 align = TYPE_ALIGN_UNIT (elem_type);
2076 misalign = 0;
2078 else
2079 misalign = DR_MISALIGNMENT (dr);
2080 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2081 misalign);
2082 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2083 misalign ? misalign & -misalign : align);
2084 new_stmt
2085 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2086 ptr, vec_mask);
2087 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2088 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2089 if (i == 0)
2090 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2091 else
2092 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2093 prev_stmt_info = vinfo_for_stmt (new_stmt);
2097 if (!is_store)
2099 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2100 from the IL. */
2101 if (STMT_VINFO_RELATED_STMT (stmt_info))
2103 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2104 stmt_info = vinfo_for_stmt (stmt);
2106 tree lhs = gimple_call_lhs (stmt);
2107 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2108 set_vinfo_for_stmt (new_stmt, stmt_info);
2109 set_vinfo_for_stmt (stmt, NULL);
2110 STMT_VINFO_STMT (stmt_info) = new_stmt;
2111 gsi_replace (gsi, new_stmt, true);
2114 return true;
2117 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2118 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2119 in a single step. On success, store the binary pack code in
2120 *CONVERT_CODE. */
2122 static bool
2123 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2124 tree_code *convert_code)
2126 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2127 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2128 return false;
2130 tree_code code;
2131 int multi_step_cvt = 0;
2132 auto_vec <tree, 8> interm_types;
2133 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2134 &code, &multi_step_cvt,
2135 &interm_types)
2136 || multi_step_cvt)
2137 return false;
2139 *convert_code = code;
2140 return true;
2143 /* Function vectorizable_call.
2145 Check if GS performs a function call that can be vectorized.
2146 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2147 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2148 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2150 static bool
2151 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2152 slp_tree slp_node)
2154 gcall *stmt;
2155 tree vec_dest;
2156 tree scalar_dest;
2157 tree op, type;
2158 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2159 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2160 tree vectype_out, vectype_in;
2161 int nunits_in;
2162 int nunits_out;
2163 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2164 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2165 vec_info *vinfo = stmt_info->vinfo;
2166 tree fndecl, new_temp, rhs_type;
2167 gimple *def_stmt;
2168 enum vect_def_type dt[3]
2169 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2170 gimple *new_stmt = NULL;
2171 int ncopies, j;
2172 vec<tree> vargs = vNULL;
2173 enum { NARROW, NONE, WIDEN } modifier;
2174 size_t i, nargs;
2175 tree lhs;
2177 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2178 return false;
2180 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2181 && ! vec_stmt)
2182 return false;
2184 /* Is GS a vectorizable call? */
2185 stmt = dyn_cast <gcall *> (gs);
2186 if (!stmt)
2187 return false;
2189 if (gimple_call_internal_p (stmt)
2190 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2191 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2192 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2193 slp_node);
2195 if (gimple_call_lhs (stmt) == NULL_TREE
2196 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2197 return false;
2199 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2201 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2203 /* Process function arguments. */
2204 rhs_type = NULL_TREE;
2205 vectype_in = NULL_TREE;
2206 nargs = gimple_call_num_args (stmt);
2208 /* Bail out if the function has more than three arguments, we do not have
2209 interesting builtin functions to vectorize with more than two arguments
2210 except for fma. No arguments is also not good. */
2211 if (nargs == 0 || nargs > 3)
2212 return false;
2214 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2215 if (gimple_call_internal_p (stmt)
2216 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2218 nargs = 0;
2219 rhs_type = unsigned_type_node;
2222 for (i = 0; i < nargs; i++)
2224 tree opvectype;
2226 op = gimple_call_arg (stmt, i);
2228 /* We can only handle calls with arguments of the same type. */
2229 if (rhs_type
2230 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2232 if (dump_enabled_p ())
2233 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2234 "argument types differ.\n");
2235 return false;
2237 if (!rhs_type)
2238 rhs_type = TREE_TYPE (op);
2240 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2242 if (dump_enabled_p ())
2243 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2244 "use not simple.\n");
2245 return false;
2248 if (!vectype_in)
2249 vectype_in = opvectype;
2250 else if (opvectype
2251 && opvectype != vectype_in)
2253 if (dump_enabled_p ())
2254 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2255 "argument vector types differ.\n");
2256 return false;
2259 /* If all arguments are external or constant defs use a vector type with
2260 the same size as the output vector type. */
2261 if (!vectype_in)
2262 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2263 if (vec_stmt)
2264 gcc_assert (vectype_in);
2265 if (!vectype_in)
2267 if (dump_enabled_p ())
2269 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2270 "no vectype for scalar type ");
2271 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2272 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2275 return false;
2278 /* FORNOW */
2279 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2280 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2281 if (nunits_in == nunits_out / 2)
2282 modifier = NARROW;
2283 else if (nunits_out == nunits_in)
2284 modifier = NONE;
2285 else if (nunits_out == nunits_in / 2)
2286 modifier = WIDEN;
2287 else
2288 return false;
2290 /* We only handle functions that do not read or clobber memory. */
2291 if (gimple_vuse (stmt))
2293 if (dump_enabled_p ())
2294 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2295 "function reads from or writes to memory.\n");
2296 return false;
2299 /* For now, we only vectorize functions if a target specific builtin
2300 is available. TODO -- in some cases, it might be profitable to
2301 insert the calls for pieces of the vector, in order to be able
2302 to vectorize other operations in the loop. */
2303 fndecl = NULL_TREE;
2304 internal_fn ifn = IFN_LAST;
2305 combined_fn cfn = gimple_call_combined_fn (stmt);
2306 tree callee = gimple_call_fndecl (stmt);
2308 /* First try using an internal function. */
2309 tree_code convert_code = ERROR_MARK;
2310 if (cfn != CFN_LAST
2311 && (modifier == NONE
2312 || (modifier == NARROW
2313 && simple_integer_narrowing (vectype_out, vectype_in,
2314 &convert_code))))
2315 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2316 vectype_in);
2318 /* If that fails, try asking for a target-specific built-in function. */
2319 if (ifn == IFN_LAST)
2321 if (cfn != CFN_LAST)
2322 fndecl = targetm.vectorize.builtin_vectorized_function
2323 (cfn, vectype_out, vectype_in);
2324 else
2325 fndecl = targetm.vectorize.builtin_md_vectorized_function
2326 (callee, vectype_out, vectype_in);
2329 if (ifn == IFN_LAST && !fndecl)
2331 if (cfn == CFN_GOMP_SIMD_LANE
2332 && !slp_node
2333 && loop_vinfo
2334 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2335 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2336 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2337 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2339 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2340 { 0, 1, 2, ... vf - 1 } vector. */
2341 gcc_assert (nargs == 0);
2343 else
2345 if (dump_enabled_p ())
2346 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2347 "function is not vectorizable.\n");
2348 return false;
2352 if (slp_node || PURE_SLP_STMT (stmt_info))
2353 ncopies = 1;
2354 else if (modifier == NARROW && ifn == IFN_LAST)
2355 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2356 else
2357 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2359 /* Sanity check: make sure that at least one copy of the vectorized stmt
2360 needs to be generated. */
2361 gcc_assert (ncopies >= 1);
2363 if (!vec_stmt) /* transformation not required. */
2365 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2366 if (dump_enabled_p ())
2367 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2368 "\n");
2369 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2370 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2371 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2372 vec_promote_demote, stmt_info, 0, vect_body);
2374 return true;
2377 /** Transform. **/
2379 if (dump_enabled_p ())
2380 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2382 /* Handle def. */
2383 scalar_dest = gimple_call_lhs (stmt);
2384 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2386 prev_stmt_info = NULL;
2387 if (modifier == NONE || ifn != IFN_LAST)
2389 tree prev_res = NULL_TREE;
2390 for (j = 0; j < ncopies; ++j)
2392 /* Build argument list for the vectorized call. */
2393 if (j == 0)
2394 vargs.create (nargs);
2395 else
2396 vargs.truncate (0);
2398 if (slp_node)
2400 auto_vec<vec<tree> > vec_defs (nargs);
2401 vec<tree> vec_oprnds0;
2403 for (i = 0; i < nargs; i++)
2404 vargs.quick_push (gimple_call_arg (stmt, i));
2405 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2406 vec_oprnds0 = vec_defs[0];
2408 /* Arguments are ready. Create the new vector stmt. */
2409 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2411 size_t k;
2412 for (k = 0; k < nargs; k++)
2414 vec<tree> vec_oprndsk = vec_defs[k];
2415 vargs[k] = vec_oprndsk[i];
2417 if (modifier == NARROW)
2419 tree half_res = make_ssa_name (vectype_in);
2420 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2421 gimple_call_set_lhs (new_stmt, half_res);
2422 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2423 if ((i & 1) == 0)
2425 prev_res = half_res;
2426 continue;
2428 new_temp = make_ssa_name (vec_dest);
2429 new_stmt = gimple_build_assign (new_temp, convert_code,
2430 prev_res, half_res);
2432 else
2434 if (ifn != IFN_LAST)
2435 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2436 else
2437 new_stmt = gimple_build_call_vec (fndecl, vargs);
2438 new_temp = make_ssa_name (vec_dest, new_stmt);
2439 gimple_call_set_lhs (new_stmt, new_temp);
2441 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2442 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2445 for (i = 0; i < nargs; i++)
2447 vec<tree> vec_oprndsi = vec_defs[i];
2448 vec_oprndsi.release ();
2450 continue;
2453 for (i = 0; i < nargs; i++)
2455 op = gimple_call_arg (stmt, i);
2456 if (j == 0)
2457 vec_oprnd0
2458 = vect_get_vec_def_for_operand (op, stmt);
2459 else
2461 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2462 vec_oprnd0
2463 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2466 vargs.quick_push (vec_oprnd0);
2469 if (gimple_call_internal_p (stmt)
2470 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2472 tree *v = XALLOCAVEC (tree, nunits_out);
2473 int k;
2474 for (k = 0; k < nunits_out; ++k)
2475 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2476 tree cst = build_vector (vectype_out, v);
2477 tree new_var
2478 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2479 gimple *init_stmt = gimple_build_assign (new_var, cst);
2480 vect_init_vector_1 (stmt, init_stmt, NULL);
2481 new_temp = make_ssa_name (vec_dest);
2482 new_stmt = gimple_build_assign (new_temp, new_var);
2484 else if (modifier == NARROW)
2486 tree half_res = make_ssa_name (vectype_in);
2487 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2488 gimple_call_set_lhs (new_stmt, half_res);
2489 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2490 if ((j & 1) == 0)
2492 prev_res = half_res;
2493 continue;
2495 new_temp = make_ssa_name (vec_dest);
2496 new_stmt = gimple_build_assign (new_temp, convert_code,
2497 prev_res, half_res);
2499 else
2501 if (ifn != IFN_LAST)
2502 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2503 else
2504 new_stmt = gimple_build_call_vec (fndecl, vargs);
2505 new_temp = make_ssa_name (vec_dest, new_stmt);
2506 gimple_call_set_lhs (new_stmt, new_temp);
2508 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2510 if (j == (modifier == NARROW ? 1 : 0))
2511 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2512 else
2513 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2515 prev_stmt_info = vinfo_for_stmt (new_stmt);
2518 else if (modifier == NARROW)
2520 for (j = 0; j < ncopies; ++j)
2522 /* Build argument list for the vectorized call. */
2523 if (j == 0)
2524 vargs.create (nargs * 2);
2525 else
2526 vargs.truncate (0);
2528 if (slp_node)
2530 auto_vec<vec<tree> > vec_defs (nargs);
2531 vec<tree> vec_oprnds0;
2533 for (i = 0; i < nargs; i++)
2534 vargs.quick_push (gimple_call_arg (stmt, i));
2535 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2536 vec_oprnds0 = vec_defs[0];
2538 /* Arguments are ready. Create the new vector stmt. */
2539 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2541 size_t k;
2542 vargs.truncate (0);
2543 for (k = 0; k < nargs; k++)
2545 vec<tree> vec_oprndsk = vec_defs[k];
2546 vargs.quick_push (vec_oprndsk[i]);
2547 vargs.quick_push (vec_oprndsk[i + 1]);
2549 if (ifn != IFN_LAST)
2550 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2551 else
2552 new_stmt = gimple_build_call_vec (fndecl, vargs);
2553 new_temp = make_ssa_name (vec_dest, new_stmt);
2554 gimple_call_set_lhs (new_stmt, new_temp);
2555 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2556 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2559 for (i = 0; i < nargs; i++)
2561 vec<tree> vec_oprndsi = vec_defs[i];
2562 vec_oprndsi.release ();
2564 continue;
2567 for (i = 0; i < nargs; i++)
2569 op = gimple_call_arg (stmt, i);
2570 if (j == 0)
2572 vec_oprnd0
2573 = vect_get_vec_def_for_operand (op, stmt);
2574 vec_oprnd1
2575 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2577 else
2579 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2580 vec_oprnd0
2581 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2582 vec_oprnd1
2583 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2586 vargs.quick_push (vec_oprnd0);
2587 vargs.quick_push (vec_oprnd1);
2590 new_stmt = gimple_build_call_vec (fndecl, vargs);
2591 new_temp = make_ssa_name (vec_dest, new_stmt);
2592 gimple_call_set_lhs (new_stmt, new_temp);
2593 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2595 if (j == 0)
2596 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2597 else
2598 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2600 prev_stmt_info = vinfo_for_stmt (new_stmt);
2603 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2605 else
2606 /* No current target implements this case. */
2607 return false;
2609 vargs.release ();
2611 /* The call in STMT might prevent it from being removed in dce.
2612 We however cannot remove it here, due to the way the ssa name
2613 it defines is mapped to the new definition. So just replace
2614 rhs of the statement with something harmless. */
2616 if (slp_node)
2617 return true;
2619 type = TREE_TYPE (scalar_dest);
2620 if (is_pattern_stmt_p (stmt_info))
2621 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2622 else
2623 lhs = gimple_call_lhs (stmt);
2625 if (gimple_call_internal_p (stmt)
2626 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2628 /* Replace uses of the lhs of GOMP_SIMD_LANE call outside the loop
2629 with vf - 1 rather than 0, that is the last iteration of the
2630 vectorized loop. */
2631 imm_use_iterator iter;
2632 use_operand_p use_p;
2633 gimple *use_stmt;
2634 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2636 basic_block use_bb = gimple_bb (use_stmt);
2637 if (use_bb
2638 && !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo), use_bb))
2640 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2641 SET_USE (use_p, build_int_cst (TREE_TYPE (lhs),
2642 ncopies * nunits_out - 1));
2643 update_stmt (use_stmt);
2648 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2649 set_vinfo_for_stmt (new_stmt, stmt_info);
2650 set_vinfo_for_stmt (stmt, NULL);
2651 STMT_VINFO_STMT (stmt_info) = new_stmt;
2652 gsi_replace (gsi, new_stmt, false);
2654 return true;
2658 struct simd_call_arg_info
2660 tree vectype;
2661 tree op;
2662 enum vect_def_type dt;
2663 HOST_WIDE_INT linear_step;
2664 unsigned int align;
2665 bool simd_lane_linear;
2668 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
2669 is linear within simd lane (but not within whole loop), note it in
2670 *ARGINFO. */
2672 static void
2673 vect_simd_lane_linear (tree op, struct loop *loop,
2674 struct simd_call_arg_info *arginfo)
2676 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
2678 if (!is_gimple_assign (def_stmt)
2679 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
2680 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
2681 return;
2683 tree base = gimple_assign_rhs1 (def_stmt);
2684 HOST_WIDE_INT linear_step = 0;
2685 tree v = gimple_assign_rhs2 (def_stmt);
2686 while (TREE_CODE (v) == SSA_NAME)
2688 tree t;
2689 def_stmt = SSA_NAME_DEF_STMT (v);
2690 if (is_gimple_assign (def_stmt))
2691 switch (gimple_assign_rhs_code (def_stmt))
2693 case PLUS_EXPR:
2694 t = gimple_assign_rhs2 (def_stmt);
2695 if (linear_step || TREE_CODE (t) != INTEGER_CST)
2696 return;
2697 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
2698 v = gimple_assign_rhs1 (def_stmt);
2699 continue;
2700 case MULT_EXPR:
2701 t = gimple_assign_rhs2 (def_stmt);
2702 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
2703 return;
2704 linear_step = tree_to_shwi (t);
2705 v = gimple_assign_rhs1 (def_stmt);
2706 continue;
2707 CASE_CONVERT:
2708 t = gimple_assign_rhs1 (def_stmt);
2709 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
2710 || (TYPE_PRECISION (TREE_TYPE (v))
2711 < TYPE_PRECISION (TREE_TYPE (t))))
2712 return;
2713 if (!linear_step)
2714 linear_step = 1;
2715 v = t;
2716 continue;
2717 default:
2718 return;
2720 else if (is_gimple_call (def_stmt)
2721 && gimple_call_internal_p (def_stmt)
2722 && gimple_call_internal_fn (def_stmt) == IFN_GOMP_SIMD_LANE
2723 && loop->simduid
2724 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
2725 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
2726 == loop->simduid))
2728 if (!linear_step)
2729 linear_step = 1;
2730 arginfo->linear_step = linear_step;
2731 arginfo->op = base;
2732 arginfo->simd_lane_linear = true;
2733 return;
2738 /* Function vectorizable_simd_clone_call.
2740 Check if STMT performs a function call that can be vectorized
2741 by calling a simd clone of the function.
2742 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2743 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2744 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2746 static bool
2747 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
2748 gimple **vec_stmt, slp_tree slp_node)
2750 tree vec_dest;
2751 tree scalar_dest;
2752 tree op, type;
2753 tree vec_oprnd0 = NULL_TREE;
2754 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2755 tree vectype;
2756 unsigned int nunits;
2757 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2758 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2759 vec_info *vinfo = stmt_info->vinfo;
2760 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2761 tree fndecl, new_temp;
2762 gimple *def_stmt;
2763 gimple *new_stmt = NULL;
2764 int ncopies, j;
2765 vec<simd_call_arg_info> arginfo = vNULL;
2766 vec<tree> vargs = vNULL;
2767 size_t i, nargs;
2768 tree lhs, rtype, ratype;
2769 vec<constructor_elt, va_gc> *ret_ctor_elts;
2771 /* Is STMT a vectorizable call? */
2772 if (!is_gimple_call (stmt))
2773 return false;
2775 fndecl = gimple_call_fndecl (stmt);
2776 if (fndecl == NULL_TREE)
2777 return false;
2779 struct cgraph_node *node = cgraph_node::get (fndecl);
2780 if (node == NULL || node->simd_clones == NULL)
2781 return false;
2783 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2784 return false;
2786 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2787 && ! vec_stmt)
2788 return false;
2790 if (gimple_call_lhs (stmt)
2791 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2792 return false;
2794 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2796 vectype = STMT_VINFO_VECTYPE (stmt_info);
2798 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2799 return false;
2801 /* FORNOW */
2802 if (slp_node || PURE_SLP_STMT (stmt_info))
2803 return false;
2805 /* Process function arguments. */
2806 nargs = gimple_call_num_args (stmt);
2808 /* Bail out if the function has zero arguments. */
2809 if (nargs == 0)
2810 return false;
2812 arginfo.create (nargs);
2814 for (i = 0; i < nargs; i++)
2816 simd_call_arg_info thisarginfo;
2817 affine_iv iv;
2819 thisarginfo.linear_step = 0;
2820 thisarginfo.align = 0;
2821 thisarginfo.op = NULL_TREE;
2822 thisarginfo.simd_lane_linear = false;
2824 op = gimple_call_arg (stmt, i);
2825 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
2826 &thisarginfo.vectype)
2827 || thisarginfo.dt == vect_uninitialized_def)
2829 if (dump_enabled_p ())
2830 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2831 "use not simple.\n");
2832 arginfo.release ();
2833 return false;
2836 if (thisarginfo.dt == vect_constant_def
2837 || thisarginfo.dt == vect_external_def)
2838 gcc_assert (thisarginfo.vectype == NULL_TREE);
2839 else
2840 gcc_assert (thisarginfo.vectype != NULL_TREE);
2842 /* For linear arguments, the analyze phase should have saved
2843 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2844 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2845 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
2847 gcc_assert (vec_stmt);
2848 thisarginfo.linear_step
2849 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
2850 thisarginfo.op
2851 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
2852 thisarginfo.simd_lane_linear
2853 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
2854 == boolean_true_node);
2855 /* If loop has been peeled for alignment, we need to adjust it. */
2856 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2857 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2858 if (n1 != n2 && !thisarginfo.simd_lane_linear)
2860 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2861 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
2862 tree opt = TREE_TYPE (thisarginfo.op);
2863 bias = fold_convert (TREE_TYPE (step), bias);
2864 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2865 thisarginfo.op
2866 = fold_build2 (POINTER_TYPE_P (opt)
2867 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2868 thisarginfo.op, bias);
2871 else if (!vec_stmt
2872 && thisarginfo.dt != vect_constant_def
2873 && thisarginfo.dt != vect_external_def
2874 && loop_vinfo
2875 && TREE_CODE (op) == SSA_NAME
2876 && simple_iv (loop, loop_containing_stmt (stmt), op,
2877 &iv, false)
2878 && tree_fits_shwi_p (iv.step))
2880 thisarginfo.linear_step = tree_to_shwi (iv.step);
2881 thisarginfo.op = iv.base;
2883 else if ((thisarginfo.dt == vect_constant_def
2884 || thisarginfo.dt == vect_external_def)
2885 && POINTER_TYPE_P (TREE_TYPE (op)))
2886 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2887 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
2888 linear too. */
2889 if (POINTER_TYPE_P (TREE_TYPE (op))
2890 && !thisarginfo.linear_step
2891 && !vec_stmt
2892 && thisarginfo.dt != vect_constant_def
2893 && thisarginfo.dt != vect_external_def
2894 && loop_vinfo
2895 && !slp_node
2896 && TREE_CODE (op) == SSA_NAME)
2897 vect_simd_lane_linear (op, loop, &thisarginfo);
2899 arginfo.quick_push (thisarginfo);
2902 unsigned int badness = 0;
2903 struct cgraph_node *bestn = NULL;
2904 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2905 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
2906 else
2907 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2908 n = n->simdclone->next_clone)
2910 unsigned int this_badness = 0;
2911 if (n->simdclone->simdlen
2912 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2913 || n->simdclone->nargs != nargs)
2914 continue;
2915 if (n->simdclone->simdlen
2916 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2917 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2918 - exact_log2 (n->simdclone->simdlen)) * 1024;
2919 if (n->simdclone->inbranch)
2920 this_badness += 2048;
2921 int target_badness = targetm.simd_clone.usable (n);
2922 if (target_badness < 0)
2923 continue;
2924 this_badness += target_badness * 512;
2925 /* FORNOW: Have to add code to add the mask argument. */
2926 if (n->simdclone->inbranch)
2927 continue;
2928 for (i = 0; i < nargs; i++)
2930 switch (n->simdclone->args[i].arg_type)
2932 case SIMD_CLONE_ARG_TYPE_VECTOR:
2933 if (!useless_type_conversion_p
2934 (n->simdclone->args[i].orig_type,
2935 TREE_TYPE (gimple_call_arg (stmt, i))))
2936 i = -1;
2937 else if (arginfo[i].dt == vect_constant_def
2938 || arginfo[i].dt == vect_external_def
2939 || arginfo[i].linear_step)
2940 this_badness += 64;
2941 break;
2942 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2943 if (arginfo[i].dt != vect_constant_def
2944 && arginfo[i].dt != vect_external_def)
2945 i = -1;
2946 break;
2947 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2948 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
2949 if (arginfo[i].dt == vect_constant_def
2950 || arginfo[i].dt == vect_external_def
2951 || (arginfo[i].linear_step
2952 != n->simdclone->args[i].linear_step))
2953 i = -1;
2954 break;
2955 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2956 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
2957 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
2958 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
2959 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
2960 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
2961 /* FORNOW */
2962 i = -1;
2963 break;
2964 case SIMD_CLONE_ARG_TYPE_MASK:
2965 gcc_unreachable ();
2967 if (i == (size_t) -1)
2968 break;
2969 if (n->simdclone->args[i].alignment > arginfo[i].align)
2971 i = -1;
2972 break;
2974 if (arginfo[i].align)
2975 this_badness += (exact_log2 (arginfo[i].align)
2976 - exact_log2 (n->simdclone->args[i].alignment));
2978 if (i == (size_t) -1)
2979 continue;
2980 if (bestn == NULL || this_badness < badness)
2982 bestn = n;
2983 badness = this_badness;
2987 if (bestn == NULL)
2989 arginfo.release ();
2990 return false;
2993 for (i = 0; i < nargs; i++)
2994 if ((arginfo[i].dt == vect_constant_def
2995 || arginfo[i].dt == vect_external_def)
2996 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2998 arginfo[i].vectype
2999 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3000 i)));
3001 if (arginfo[i].vectype == NULL
3002 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3003 > bestn->simdclone->simdlen))
3005 arginfo.release ();
3006 return false;
3010 fndecl = bestn->decl;
3011 nunits = bestn->simdclone->simdlen;
3012 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3014 /* If the function isn't const, only allow it in simd loops where user
3015 has asserted that at least nunits consecutive iterations can be
3016 performed using SIMD instructions. */
3017 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3018 && gimple_vuse (stmt))
3020 arginfo.release ();
3021 return false;
3024 /* Sanity check: make sure that at least one copy of the vectorized stmt
3025 needs to be generated. */
3026 gcc_assert (ncopies >= 1);
3028 if (!vec_stmt) /* transformation not required. */
3030 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3031 for (i = 0; i < nargs; i++)
3032 if ((bestn->simdclone->args[i].arg_type
3033 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3034 || (bestn->simdclone->args[i].arg_type
3035 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3037 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3038 + 1);
3039 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3040 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3041 ? size_type_node : TREE_TYPE (arginfo[i].op);
3042 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3043 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3044 tree sll = arginfo[i].simd_lane_linear
3045 ? boolean_true_node : boolean_false_node;
3046 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3048 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3049 if (dump_enabled_p ())
3050 dump_printf_loc (MSG_NOTE, vect_location,
3051 "=== vectorizable_simd_clone_call ===\n");
3052 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3053 arginfo.release ();
3054 return true;
3057 /** Transform. **/
3059 if (dump_enabled_p ())
3060 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3062 /* Handle def. */
3063 scalar_dest = gimple_call_lhs (stmt);
3064 vec_dest = NULL_TREE;
3065 rtype = NULL_TREE;
3066 ratype = NULL_TREE;
3067 if (scalar_dest)
3069 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3070 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3071 if (TREE_CODE (rtype) == ARRAY_TYPE)
3073 ratype = rtype;
3074 rtype = TREE_TYPE (ratype);
3078 prev_stmt_info = NULL;
3079 for (j = 0; j < ncopies; ++j)
3081 /* Build argument list for the vectorized call. */
3082 if (j == 0)
3083 vargs.create (nargs);
3084 else
3085 vargs.truncate (0);
3087 for (i = 0; i < nargs; i++)
3089 unsigned int k, l, m, o;
3090 tree atype;
3091 op = gimple_call_arg (stmt, i);
3092 switch (bestn->simdclone->args[i].arg_type)
3094 case SIMD_CLONE_ARG_TYPE_VECTOR:
3095 atype = bestn->simdclone->args[i].vector_type;
3096 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3097 for (m = j * o; m < (j + 1) * o; m++)
3099 if (TYPE_VECTOR_SUBPARTS (atype)
3100 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3102 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3103 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3104 / TYPE_VECTOR_SUBPARTS (atype));
3105 gcc_assert ((k & (k - 1)) == 0);
3106 if (m == 0)
3107 vec_oprnd0
3108 = vect_get_vec_def_for_operand (op, stmt);
3109 else
3111 vec_oprnd0 = arginfo[i].op;
3112 if ((m & (k - 1)) == 0)
3113 vec_oprnd0
3114 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3115 vec_oprnd0);
3117 arginfo[i].op = vec_oprnd0;
3118 vec_oprnd0
3119 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3120 size_int (prec),
3121 bitsize_int ((m & (k - 1)) * prec));
3122 new_stmt
3123 = gimple_build_assign (make_ssa_name (atype),
3124 vec_oprnd0);
3125 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3126 vargs.safe_push (gimple_assign_lhs (new_stmt));
3128 else
3130 k = (TYPE_VECTOR_SUBPARTS (atype)
3131 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3132 gcc_assert ((k & (k - 1)) == 0);
3133 vec<constructor_elt, va_gc> *ctor_elts;
3134 if (k != 1)
3135 vec_alloc (ctor_elts, k);
3136 else
3137 ctor_elts = NULL;
3138 for (l = 0; l < k; l++)
3140 if (m == 0 && l == 0)
3141 vec_oprnd0
3142 = vect_get_vec_def_for_operand (op, stmt);
3143 else
3144 vec_oprnd0
3145 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3146 arginfo[i].op);
3147 arginfo[i].op = vec_oprnd0;
3148 if (k == 1)
3149 break;
3150 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3151 vec_oprnd0);
3153 if (k == 1)
3154 vargs.safe_push (vec_oprnd0);
3155 else
3157 vec_oprnd0 = build_constructor (atype, ctor_elts);
3158 new_stmt
3159 = gimple_build_assign (make_ssa_name (atype),
3160 vec_oprnd0);
3161 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3162 vargs.safe_push (gimple_assign_lhs (new_stmt));
3166 break;
3167 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3168 vargs.safe_push (op);
3169 break;
3170 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3171 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3172 if (j == 0)
3174 gimple_seq stmts;
3175 arginfo[i].op
3176 = force_gimple_operand (arginfo[i].op, &stmts, true,
3177 NULL_TREE);
3178 if (stmts != NULL)
3180 basic_block new_bb;
3181 edge pe = loop_preheader_edge (loop);
3182 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3183 gcc_assert (!new_bb);
3185 if (arginfo[i].simd_lane_linear)
3187 vargs.safe_push (arginfo[i].op);
3188 break;
3190 tree phi_res = copy_ssa_name (op);
3191 gphi *new_phi = create_phi_node (phi_res, loop->header);
3192 set_vinfo_for_stmt (new_phi,
3193 new_stmt_vec_info (new_phi, loop_vinfo));
3194 add_phi_arg (new_phi, arginfo[i].op,
3195 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3196 enum tree_code code
3197 = POINTER_TYPE_P (TREE_TYPE (op))
3198 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3199 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3200 ? sizetype : TREE_TYPE (op);
3201 widest_int cst
3202 = wi::mul (bestn->simdclone->args[i].linear_step,
3203 ncopies * nunits);
3204 tree tcst = wide_int_to_tree (type, cst);
3205 tree phi_arg = copy_ssa_name (op);
3206 new_stmt
3207 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3208 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3209 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3210 set_vinfo_for_stmt (new_stmt,
3211 new_stmt_vec_info (new_stmt, loop_vinfo));
3212 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3213 UNKNOWN_LOCATION);
3214 arginfo[i].op = phi_res;
3215 vargs.safe_push (phi_res);
3217 else
3219 enum tree_code code
3220 = POINTER_TYPE_P (TREE_TYPE (op))
3221 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3222 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3223 ? sizetype : TREE_TYPE (op);
3224 widest_int cst
3225 = wi::mul (bestn->simdclone->args[i].linear_step,
3226 j * nunits);
3227 tree tcst = wide_int_to_tree (type, cst);
3228 new_temp = make_ssa_name (TREE_TYPE (op));
3229 new_stmt = gimple_build_assign (new_temp, code,
3230 arginfo[i].op, tcst);
3231 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3232 vargs.safe_push (new_temp);
3234 break;
3235 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3236 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3237 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3238 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3239 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3240 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3241 default:
3242 gcc_unreachable ();
3246 new_stmt = gimple_build_call_vec (fndecl, vargs);
3247 if (vec_dest)
3249 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3250 if (ratype)
3251 new_temp = create_tmp_var (ratype);
3252 else if (TYPE_VECTOR_SUBPARTS (vectype)
3253 == TYPE_VECTOR_SUBPARTS (rtype))
3254 new_temp = make_ssa_name (vec_dest, new_stmt);
3255 else
3256 new_temp = make_ssa_name (rtype, new_stmt);
3257 gimple_call_set_lhs (new_stmt, new_temp);
3259 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3261 if (vec_dest)
3263 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3265 unsigned int k, l;
3266 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3267 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3268 gcc_assert ((k & (k - 1)) == 0);
3269 for (l = 0; l < k; l++)
3271 tree t;
3272 if (ratype)
3274 t = build_fold_addr_expr (new_temp);
3275 t = build2 (MEM_REF, vectype, t,
3276 build_int_cst (TREE_TYPE (t),
3277 l * prec / BITS_PER_UNIT));
3279 else
3280 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3281 size_int (prec), bitsize_int (l * prec));
3282 new_stmt
3283 = gimple_build_assign (make_ssa_name (vectype), t);
3284 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3285 if (j == 0 && l == 0)
3286 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3287 else
3288 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3290 prev_stmt_info = vinfo_for_stmt (new_stmt);
3293 if (ratype)
3295 tree clobber = build_constructor (ratype, NULL);
3296 TREE_THIS_VOLATILE (clobber) = 1;
3297 new_stmt = gimple_build_assign (new_temp, clobber);
3298 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3300 continue;
3302 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3304 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3305 / TYPE_VECTOR_SUBPARTS (rtype));
3306 gcc_assert ((k & (k - 1)) == 0);
3307 if ((j & (k - 1)) == 0)
3308 vec_alloc (ret_ctor_elts, k);
3309 if (ratype)
3311 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3312 for (m = 0; m < o; m++)
3314 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3315 size_int (m), NULL_TREE, NULL_TREE);
3316 new_stmt
3317 = gimple_build_assign (make_ssa_name (rtype), tem);
3318 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3319 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3320 gimple_assign_lhs (new_stmt));
3322 tree clobber = build_constructor (ratype, NULL);
3323 TREE_THIS_VOLATILE (clobber) = 1;
3324 new_stmt = gimple_build_assign (new_temp, clobber);
3325 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3327 else
3328 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3329 if ((j & (k - 1)) != k - 1)
3330 continue;
3331 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3332 new_stmt
3333 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3334 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3336 if ((unsigned) j == k - 1)
3337 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3338 else
3339 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3341 prev_stmt_info = vinfo_for_stmt (new_stmt);
3342 continue;
3344 else if (ratype)
3346 tree t = build_fold_addr_expr (new_temp);
3347 t = build2 (MEM_REF, vectype, t,
3348 build_int_cst (TREE_TYPE (t), 0));
3349 new_stmt
3350 = gimple_build_assign (make_ssa_name (vec_dest), t);
3351 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3352 tree clobber = build_constructor (ratype, NULL);
3353 TREE_THIS_VOLATILE (clobber) = 1;
3354 vect_finish_stmt_generation (stmt,
3355 gimple_build_assign (new_temp,
3356 clobber), gsi);
3360 if (j == 0)
3361 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3362 else
3363 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3365 prev_stmt_info = vinfo_for_stmt (new_stmt);
3368 vargs.release ();
3370 /* The call in STMT might prevent it from being removed in dce.
3371 We however cannot remove it here, due to the way the ssa name
3372 it defines is mapped to the new definition. So just replace
3373 rhs of the statement with something harmless. */
3375 if (slp_node)
3376 return true;
3378 if (scalar_dest)
3380 type = TREE_TYPE (scalar_dest);
3381 if (is_pattern_stmt_p (stmt_info))
3382 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3383 else
3384 lhs = gimple_call_lhs (stmt);
3385 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3387 else
3388 new_stmt = gimple_build_nop ();
3389 set_vinfo_for_stmt (new_stmt, stmt_info);
3390 set_vinfo_for_stmt (stmt, NULL);
3391 STMT_VINFO_STMT (stmt_info) = new_stmt;
3392 gsi_replace (gsi, new_stmt, true);
3393 unlink_stmt_vdef (stmt);
3395 return true;
3399 /* Function vect_gen_widened_results_half
3401 Create a vector stmt whose code, type, number of arguments, and result
3402 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3403 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3404 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3405 needs to be created (DECL is a function-decl of a target-builtin).
3406 STMT is the original scalar stmt that we are vectorizing. */
3408 static gimple *
3409 vect_gen_widened_results_half (enum tree_code code,
3410 tree decl,
3411 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3412 tree vec_dest, gimple_stmt_iterator *gsi,
3413 gimple *stmt)
3415 gimple *new_stmt;
3416 tree new_temp;
3418 /* Generate half of the widened result: */
3419 if (code == CALL_EXPR)
3421 /* Target specific support */
3422 if (op_type == binary_op)
3423 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3424 else
3425 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3426 new_temp = make_ssa_name (vec_dest, new_stmt);
3427 gimple_call_set_lhs (new_stmt, new_temp);
3429 else
3431 /* Generic support */
3432 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3433 if (op_type != binary_op)
3434 vec_oprnd1 = NULL;
3435 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3436 new_temp = make_ssa_name (vec_dest, new_stmt);
3437 gimple_assign_set_lhs (new_stmt, new_temp);
3439 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3441 return new_stmt;
3445 /* Get vectorized definitions for loop-based vectorization. For the first
3446 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3447 scalar operand), and for the rest we get a copy with
3448 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3449 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3450 The vectors are collected into VEC_OPRNDS. */
3452 static void
3453 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3454 vec<tree> *vec_oprnds, int multi_step_cvt)
3456 tree vec_oprnd;
3458 /* Get first vector operand. */
3459 /* All the vector operands except the very first one (that is scalar oprnd)
3460 are stmt copies. */
3461 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3462 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3463 else
3464 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3466 vec_oprnds->quick_push (vec_oprnd);
3468 /* Get second vector operand. */
3469 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3470 vec_oprnds->quick_push (vec_oprnd);
3472 *oprnd = vec_oprnd;
3474 /* For conversion in multiple steps, continue to get operands
3475 recursively. */
3476 if (multi_step_cvt)
3477 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3481 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3482 For multi-step conversions store the resulting vectors and call the function
3483 recursively. */
3485 static void
3486 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3487 int multi_step_cvt, gimple *stmt,
3488 vec<tree> vec_dsts,
3489 gimple_stmt_iterator *gsi,
3490 slp_tree slp_node, enum tree_code code,
3491 stmt_vec_info *prev_stmt_info)
3493 unsigned int i;
3494 tree vop0, vop1, new_tmp, vec_dest;
3495 gimple *new_stmt;
3496 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3498 vec_dest = vec_dsts.pop ();
3500 for (i = 0; i < vec_oprnds->length (); i += 2)
3502 /* Create demotion operation. */
3503 vop0 = (*vec_oprnds)[i];
3504 vop1 = (*vec_oprnds)[i + 1];
3505 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3506 new_tmp = make_ssa_name (vec_dest, new_stmt);
3507 gimple_assign_set_lhs (new_stmt, new_tmp);
3508 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3510 if (multi_step_cvt)
3511 /* Store the resulting vector for next recursive call. */
3512 (*vec_oprnds)[i/2] = new_tmp;
3513 else
3515 /* This is the last step of the conversion sequence. Store the
3516 vectors in SLP_NODE or in vector info of the scalar statement
3517 (or in STMT_VINFO_RELATED_STMT chain). */
3518 if (slp_node)
3519 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3520 else
3522 if (!*prev_stmt_info)
3523 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3524 else
3525 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3527 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3532 /* For multi-step demotion operations we first generate demotion operations
3533 from the source type to the intermediate types, and then combine the
3534 results (stored in VEC_OPRNDS) in demotion operation to the destination
3535 type. */
3536 if (multi_step_cvt)
3538 /* At each level of recursion we have half of the operands we had at the
3539 previous level. */
3540 vec_oprnds->truncate ((i+1)/2);
3541 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3542 stmt, vec_dsts, gsi, slp_node,
3543 VEC_PACK_TRUNC_EXPR,
3544 prev_stmt_info);
3547 vec_dsts.quick_push (vec_dest);
3551 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3552 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3553 the resulting vectors and call the function recursively. */
3555 static void
3556 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3557 vec<tree> *vec_oprnds1,
3558 gimple *stmt, tree vec_dest,
3559 gimple_stmt_iterator *gsi,
3560 enum tree_code code1,
3561 enum tree_code code2, tree decl1,
3562 tree decl2, int op_type)
3564 int i;
3565 tree vop0, vop1, new_tmp1, new_tmp2;
3566 gimple *new_stmt1, *new_stmt2;
3567 vec<tree> vec_tmp = vNULL;
3569 vec_tmp.create (vec_oprnds0->length () * 2);
3570 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3572 if (op_type == binary_op)
3573 vop1 = (*vec_oprnds1)[i];
3574 else
3575 vop1 = NULL_TREE;
3577 /* Generate the two halves of promotion operation. */
3578 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3579 op_type, vec_dest, gsi, stmt);
3580 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3581 op_type, vec_dest, gsi, stmt);
3582 if (is_gimple_call (new_stmt1))
3584 new_tmp1 = gimple_call_lhs (new_stmt1);
3585 new_tmp2 = gimple_call_lhs (new_stmt2);
3587 else
3589 new_tmp1 = gimple_assign_lhs (new_stmt1);
3590 new_tmp2 = gimple_assign_lhs (new_stmt2);
3593 /* Store the results for the next step. */
3594 vec_tmp.quick_push (new_tmp1);
3595 vec_tmp.quick_push (new_tmp2);
3598 vec_oprnds0->release ();
3599 *vec_oprnds0 = vec_tmp;
3603 /* Check if STMT performs a conversion operation, that can be vectorized.
3604 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3605 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3606 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3608 static bool
3609 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
3610 gimple **vec_stmt, slp_tree slp_node)
3612 tree vec_dest;
3613 tree scalar_dest;
3614 tree op0, op1 = NULL_TREE;
3615 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3616 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3617 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3618 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3619 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3620 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3621 tree new_temp;
3622 gimple *def_stmt;
3623 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3624 gimple *new_stmt = NULL;
3625 stmt_vec_info prev_stmt_info;
3626 int nunits_in;
3627 int nunits_out;
3628 tree vectype_out, vectype_in;
3629 int ncopies, i, j;
3630 tree lhs_type, rhs_type;
3631 enum { NARROW, NONE, WIDEN } modifier;
3632 vec<tree> vec_oprnds0 = vNULL;
3633 vec<tree> vec_oprnds1 = vNULL;
3634 tree vop0;
3635 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3636 vec_info *vinfo = stmt_info->vinfo;
3637 int multi_step_cvt = 0;
3638 vec<tree> vec_dsts = vNULL;
3639 vec<tree> interm_types = vNULL;
3640 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3641 int op_type;
3642 machine_mode rhs_mode;
3643 unsigned short fltsz;
3645 /* Is STMT a vectorizable conversion? */
3647 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3648 return false;
3650 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3651 && ! vec_stmt)
3652 return false;
3654 if (!is_gimple_assign (stmt))
3655 return false;
3657 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3658 return false;
3660 code = gimple_assign_rhs_code (stmt);
3661 if (!CONVERT_EXPR_CODE_P (code)
3662 && code != FIX_TRUNC_EXPR
3663 && code != FLOAT_EXPR
3664 && code != WIDEN_MULT_EXPR
3665 && code != WIDEN_LSHIFT_EXPR)
3666 return false;
3668 op_type = TREE_CODE_LENGTH (code);
3670 /* Check types of lhs and rhs. */
3671 scalar_dest = gimple_assign_lhs (stmt);
3672 lhs_type = TREE_TYPE (scalar_dest);
3673 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3675 op0 = gimple_assign_rhs1 (stmt);
3676 rhs_type = TREE_TYPE (op0);
3678 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3679 && !((INTEGRAL_TYPE_P (lhs_type)
3680 && INTEGRAL_TYPE_P (rhs_type))
3681 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3682 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3683 return false;
3685 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
3686 && ((INTEGRAL_TYPE_P (lhs_type)
3687 && (TYPE_PRECISION (lhs_type)
3688 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3689 || (INTEGRAL_TYPE_P (rhs_type)
3690 && (TYPE_PRECISION (rhs_type)
3691 != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))))
3693 if (dump_enabled_p ())
3694 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3695 "type conversion to/from bit-precision unsupported."
3696 "\n");
3697 return false;
3700 /* Check the operands of the operation. */
3701 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
3703 if (dump_enabled_p ())
3704 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3705 "use not simple.\n");
3706 return false;
3708 if (op_type == binary_op)
3710 bool ok;
3712 op1 = gimple_assign_rhs2 (stmt);
3713 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3714 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3715 OP1. */
3716 if (CONSTANT_CLASS_P (op0))
3717 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
3718 else
3719 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
3721 if (!ok)
3723 if (dump_enabled_p ())
3724 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3725 "use not simple.\n");
3726 return false;
3730 /* If op0 is an external or constant defs use a vector type of
3731 the same size as the output vector type. */
3732 if (!vectype_in)
3733 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3734 if (vec_stmt)
3735 gcc_assert (vectype_in);
3736 if (!vectype_in)
3738 if (dump_enabled_p ())
3740 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3741 "no vectype for scalar type ");
3742 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3743 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3746 return false;
3749 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3750 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
3752 if (dump_enabled_p ())
3754 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3755 "can't convert between boolean and non "
3756 "boolean vectors");
3757 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3758 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3761 return false;
3764 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3765 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3766 if (nunits_in < nunits_out)
3767 modifier = NARROW;
3768 else if (nunits_out == nunits_in)
3769 modifier = NONE;
3770 else
3771 modifier = WIDEN;
3773 /* Multiple types in SLP are handled by creating the appropriate number of
3774 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3775 case of SLP. */
3776 if (slp_node || PURE_SLP_STMT (stmt_info))
3777 ncopies = 1;
3778 else if (modifier == NARROW)
3779 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3780 else
3781 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3783 /* Sanity check: make sure that at least one copy of the vectorized stmt
3784 needs to be generated. */
3785 gcc_assert (ncopies >= 1);
3787 /* Supportable by target? */
3788 switch (modifier)
3790 case NONE:
3791 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3792 return false;
3793 if (supportable_convert_operation (code, vectype_out, vectype_in,
3794 &decl1, &code1))
3795 break;
3796 /* FALLTHRU */
3797 unsupported:
3798 if (dump_enabled_p ())
3799 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3800 "conversion not supported by target.\n");
3801 return false;
3803 case WIDEN:
3804 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3805 &code1, &code2, &multi_step_cvt,
3806 &interm_types))
3808 /* Binary widening operation can only be supported directly by the
3809 architecture. */
3810 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3811 break;
3814 if (code != FLOAT_EXPR
3815 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3816 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3817 goto unsupported;
3819 rhs_mode = TYPE_MODE (rhs_type);
3820 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3821 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3822 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3823 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3825 cvt_type
3826 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3827 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3828 if (cvt_type == NULL_TREE)
3829 goto unsupported;
3831 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3833 if (!supportable_convert_operation (code, vectype_out,
3834 cvt_type, &decl1, &codecvt1))
3835 goto unsupported;
3837 else if (!supportable_widening_operation (code, stmt, vectype_out,
3838 cvt_type, &codecvt1,
3839 &codecvt2, &multi_step_cvt,
3840 &interm_types))
3841 continue;
3842 else
3843 gcc_assert (multi_step_cvt == 0);
3845 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3846 vectype_in, &code1, &code2,
3847 &multi_step_cvt, &interm_types))
3848 break;
3851 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3852 goto unsupported;
3854 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3855 codecvt2 = ERROR_MARK;
3856 else
3858 multi_step_cvt++;
3859 interm_types.safe_push (cvt_type);
3860 cvt_type = NULL_TREE;
3862 break;
3864 case NARROW:
3865 gcc_assert (op_type == unary_op);
3866 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3867 &code1, &multi_step_cvt,
3868 &interm_types))
3869 break;
3871 if (code != FIX_TRUNC_EXPR
3872 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3873 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3874 goto unsupported;
3876 rhs_mode = TYPE_MODE (rhs_type);
3877 cvt_type
3878 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3879 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3880 if (cvt_type == NULL_TREE)
3881 goto unsupported;
3882 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3883 &decl1, &codecvt1))
3884 goto unsupported;
3885 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3886 &code1, &multi_step_cvt,
3887 &interm_types))
3888 break;
3889 goto unsupported;
3891 default:
3892 gcc_unreachable ();
3895 if (!vec_stmt) /* transformation not required. */
3897 if (dump_enabled_p ())
3898 dump_printf_loc (MSG_NOTE, vect_location,
3899 "=== vectorizable_conversion ===\n");
3900 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3902 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3903 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3905 else if (modifier == NARROW)
3907 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3908 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3910 else
3912 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3913 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3915 interm_types.release ();
3916 return true;
3919 /** Transform. **/
3920 if (dump_enabled_p ())
3921 dump_printf_loc (MSG_NOTE, vect_location,
3922 "transform conversion. ncopies = %d.\n", ncopies);
3924 if (op_type == binary_op)
3926 if (CONSTANT_CLASS_P (op0))
3927 op0 = fold_convert (TREE_TYPE (op1), op0);
3928 else if (CONSTANT_CLASS_P (op1))
3929 op1 = fold_convert (TREE_TYPE (op0), op1);
3932 /* In case of multi-step conversion, we first generate conversion operations
3933 to the intermediate types, and then from that types to the final one.
3934 We create vector destinations for the intermediate type (TYPES) received
3935 from supportable_*_operation, and store them in the correct order
3936 for future use in vect_create_vectorized_*_stmts (). */
3937 vec_dsts.create (multi_step_cvt + 1);
3938 vec_dest = vect_create_destination_var (scalar_dest,
3939 (cvt_type && modifier == WIDEN)
3940 ? cvt_type : vectype_out);
3941 vec_dsts.quick_push (vec_dest);
3943 if (multi_step_cvt)
3945 for (i = interm_types.length () - 1;
3946 interm_types.iterate (i, &intermediate_type); i--)
3948 vec_dest = vect_create_destination_var (scalar_dest,
3949 intermediate_type);
3950 vec_dsts.quick_push (vec_dest);
3954 if (cvt_type)
3955 vec_dest = vect_create_destination_var (scalar_dest,
3956 modifier == WIDEN
3957 ? vectype_out : cvt_type);
3959 if (!slp_node)
3961 if (modifier == WIDEN)
3963 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3964 if (op_type == binary_op)
3965 vec_oprnds1.create (1);
3967 else if (modifier == NARROW)
3968 vec_oprnds0.create (
3969 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3971 else if (code == WIDEN_LSHIFT_EXPR)
3972 vec_oprnds1.create (slp_node->vec_stmts_size);
3974 last_oprnd = op0;
3975 prev_stmt_info = NULL;
3976 switch (modifier)
3978 case NONE:
3979 for (j = 0; j < ncopies; j++)
3981 if (j == 0)
3982 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3983 -1);
3984 else
3985 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3987 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3989 /* Arguments are ready, create the new vector stmt. */
3990 if (code1 == CALL_EXPR)
3992 new_stmt = gimple_build_call (decl1, 1, vop0);
3993 new_temp = make_ssa_name (vec_dest, new_stmt);
3994 gimple_call_set_lhs (new_stmt, new_temp);
3996 else
3998 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3999 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4000 new_temp = make_ssa_name (vec_dest, new_stmt);
4001 gimple_assign_set_lhs (new_stmt, new_temp);
4004 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4005 if (slp_node)
4006 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4007 else
4009 if (!prev_stmt_info)
4010 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4011 else
4012 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4013 prev_stmt_info = vinfo_for_stmt (new_stmt);
4017 break;
4019 case WIDEN:
4020 /* In case the vectorization factor (VF) is bigger than the number
4021 of elements that we can fit in a vectype (nunits), we have to
4022 generate more than one vector stmt - i.e - we need to "unroll"
4023 the vector stmt by a factor VF/nunits. */
4024 for (j = 0; j < ncopies; j++)
4026 /* Handle uses. */
4027 if (j == 0)
4029 if (slp_node)
4031 if (code == WIDEN_LSHIFT_EXPR)
4033 unsigned int k;
4035 vec_oprnd1 = op1;
4036 /* Store vec_oprnd1 for every vector stmt to be created
4037 for SLP_NODE. We check during the analysis that all
4038 the shift arguments are the same. */
4039 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4040 vec_oprnds1.quick_push (vec_oprnd1);
4042 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4043 slp_node, -1);
4045 else
4046 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4047 &vec_oprnds1, slp_node, -1);
4049 else
4051 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4052 vec_oprnds0.quick_push (vec_oprnd0);
4053 if (op_type == binary_op)
4055 if (code == WIDEN_LSHIFT_EXPR)
4056 vec_oprnd1 = op1;
4057 else
4058 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4059 vec_oprnds1.quick_push (vec_oprnd1);
4063 else
4065 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4066 vec_oprnds0.truncate (0);
4067 vec_oprnds0.quick_push (vec_oprnd0);
4068 if (op_type == binary_op)
4070 if (code == WIDEN_LSHIFT_EXPR)
4071 vec_oprnd1 = op1;
4072 else
4073 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4074 vec_oprnd1);
4075 vec_oprnds1.truncate (0);
4076 vec_oprnds1.quick_push (vec_oprnd1);
4080 /* Arguments are ready. Create the new vector stmts. */
4081 for (i = multi_step_cvt; i >= 0; i--)
4083 tree this_dest = vec_dsts[i];
4084 enum tree_code c1 = code1, c2 = code2;
4085 if (i == 0 && codecvt2 != ERROR_MARK)
4087 c1 = codecvt1;
4088 c2 = codecvt2;
4090 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4091 &vec_oprnds1,
4092 stmt, this_dest, gsi,
4093 c1, c2, decl1, decl2,
4094 op_type);
4097 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4099 if (cvt_type)
4101 if (codecvt1 == CALL_EXPR)
4103 new_stmt = gimple_build_call (decl1, 1, vop0);
4104 new_temp = make_ssa_name (vec_dest, new_stmt);
4105 gimple_call_set_lhs (new_stmt, new_temp);
4107 else
4109 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4110 new_temp = make_ssa_name (vec_dest);
4111 new_stmt = gimple_build_assign (new_temp, codecvt1,
4112 vop0);
4115 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4117 else
4118 new_stmt = SSA_NAME_DEF_STMT (vop0);
4120 if (slp_node)
4121 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4122 else
4124 if (!prev_stmt_info)
4125 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4126 else
4127 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4128 prev_stmt_info = vinfo_for_stmt (new_stmt);
4133 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4134 break;
4136 case NARROW:
4137 /* In case the vectorization factor (VF) is bigger than the number
4138 of elements that we can fit in a vectype (nunits), we have to
4139 generate more than one vector stmt - i.e - we need to "unroll"
4140 the vector stmt by a factor VF/nunits. */
4141 for (j = 0; j < ncopies; j++)
4143 /* Handle uses. */
4144 if (slp_node)
4145 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4146 slp_node, -1);
4147 else
4149 vec_oprnds0.truncate (0);
4150 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4151 vect_pow2 (multi_step_cvt) - 1);
4154 /* Arguments are ready. Create the new vector stmts. */
4155 if (cvt_type)
4156 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4158 if (codecvt1 == CALL_EXPR)
4160 new_stmt = gimple_build_call (decl1, 1, vop0);
4161 new_temp = make_ssa_name (vec_dest, new_stmt);
4162 gimple_call_set_lhs (new_stmt, new_temp);
4164 else
4166 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4167 new_temp = make_ssa_name (vec_dest);
4168 new_stmt = gimple_build_assign (new_temp, codecvt1,
4169 vop0);
4172 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4173 vec_oprnds0[i] = new_temp;
4176 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4177 stmt, vec_dsts, gsi,
4178 slp_node, code1,
4179 &prev_stmt_info);
4182 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4183 break;
4186 vec_oprnds0.release ();
4187 vec_oprnds1.release ();
4188 vec_dsts.release ();
4189 interm_types.release ();
4191 return true;
4195 /* Function vectorizable_assignment.
4197 Check if STMT performs an assignment (copy) that can be vectorized.
4198 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4199 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4200 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4202 static bool
4203 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4204 gimple **vec_stmt, slp_tree slp_node)
4206 tree vec_dest;
4207 tree scalar_dest;
4208 tree op;
4209 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4210 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4211 tree new_temp;
4212 gimple *def_stmt;
4213 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4214 int ncopies;
4215 int i, j;
4216 vec<tree> vec_oprnds = vNULL;
4217 tree vop;
4218 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4219 vec_info *vinfo = stmt_info->vinfo;
4220 gimple *new_stmt = NULL;
4221 stmt_vec_info prev_stmt_info = NULL;
4222 enum tree_code code;
4223 tree vectype_in;
4225 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4226 return false;
4228 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4229 && ! vec_stmt)
4230 return false;
4232 /* Is vectorizable assignment? */
4233 if (!is_gimple_assign (stmt))
4234 return false;
4236 scalar_dest = gimple_assign_lhs (stmt);
4237 if (TREE_CODE (scalar_dest) != SSA_NAME)
4238 return false;
4240 code = gimple_assign_rhs_code (stmt);
4241 if (gimple_assign_single_p (stmt)
4242 || code == PAREN_EXPR
4243 || CONVERT_EXPR_CODE_P (code))
4244 op = gimple_assign_rhs1 (stmt);
4245 else
4246 return false;
4248 if (code == VIEW_CONVERT_EXPR)
4249 op = TREE_OPERAND (op, 0);
4251 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4252 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4254 /* Multiple types in SLP are handled by creating the appropriate number of
4255 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4256 case of SLP. */
4257 if (slp_node || PURE_SLP_STMT (stmt_info))
4258 ncopies = 1;
4259 else
4260 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4262 gcc_assert (ncopies >= 1);
4264 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4266 if (dump_enabled_p ())
4267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4268 "use not simple.\n");
4269 return false;
4272 /* We can handle NOP_EXPR conversions that do not change the number
4273 of elements or the vector size. */
4274 if ((CONVERT_EXPR_CODE_P (code)
4275 || code == VIEW_CONVERT_EXPR)
4276 && (!vectype_in
4277 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4278 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4279 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4280 return false;
4282 /* We do not handle bit-precision changes. */
4283 if ((CONVERT_EXPR_CODE_P (code)
4284 || code == VIEW_CONVERT_EXPR)
4285 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4286 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4287 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4288 || ((TYPE_PRECISION (TREE_TYPE (op))
4289 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4290 /* But a conversion that does not change the bit-pattern is ok. */
4291 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4292 > TYPE_PRECISION (TREE_TYPE (op)))
4293 && TYPE_UNSIGNED (TREE_TYPE (op)))
4294 /* Conversion between boolean types of different sizes is
4295 a simple assignment in case their vectypes are same
4296 boolean vectors. */
4297 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4298 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4300 if (dump_enabled_p ())
4301 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4302 "type conversion to/from bit-precision "
4303 "unsupported.\n");
4304 return false;
4307 if (!vec_stmt) /* transformation not required. */
4309 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4310 if (dump_enabled_p ())
4311 dump_printf_loc (MSG_NOTE, vect_location,
4312 "=== vectorizable_assignment ===\n");
4313 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4314 return true;
4317 /** Transform. **/
4318 if (dump_enabled_p ())
4319 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4321 /* Handle def. */
4322 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4324 /* Handle use. */
4325 for (j = 0; j < ncopies; j++)
4327 /* Handle uses. */
4328 if (j == 0)
4329 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4330 else
4331 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4333 /* Arguments are ready. create the new vector stmt. */
4334 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4336 if (CONVERT_EXPR_CODE_P (code)
4337 || code == VIEW_CONVERT_EXPR)
4338 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4339 new_stmt = gimple_build_assign (vec_dest, vop);
4340 new_temp = make_ssa_name (vec_dest, new_stmt);
4341 gimple_assign_set_lhs (new_stmt, new_temp);
4342 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4343 if (slp_node)
4344 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4347 if (slp_node)
4348 continue;
4350 if (j == 0)
4351 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4352 else
4353 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4355 prev_stmt_info = vinfo_for_stmt (new_stmt);
4358 vec_oprnds.release ();
4359 return true;
4363 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4364 either as shift by a scalar or by a vector. */
4366 bool
4367 vect_supportable_shift (enum tree_code code, tree scalar_type)
4370 machine_mode vec_mode;
4371 optab optab;
4372 int icode;
4373 tree vectype;
4375 vectype = get_vectype_for_scalar_type (scalar_type);
4376 if (!vectype)
4377 return false;
4379 optab = optab_for_tree_code (code, vectype, optab_scalar);
4380 if (!optab
4381 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4383 optab = optab_for_tree_code (code, vectype, optab_vector);
4384 if (!optab
4385 || (optab_handler (optab, TYPE_MODE (vectype))
4386 == CODE_FOR_nothing))
4387 return false;
4390 vec_mode = TYPE_MODE (vectype);
4391 icode = (int) optab_handler (optab, vec_mode);
4392 if (icode == CODE_FOR_nothing)
4393 return false;
4395 return true;
4399 /* Function vectorizable_shift.
4401 Check if STMT performs a shift operation that can be vectorized.
4402 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4403 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4404 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4406 static bool
4407 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4408 gimple **vec_stmt, slp_tree slp_node)
4410 tree vec_dest;
4411 tree scalar_dest;
4412 tree op0, op1 = NULL;
4413 tree vec_oprnd1 = NULL_TREE;
4414 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4415 tree vectype;
4416 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4417 enum tree_code code;
4418 machine_mode vec_mode;
4419 tree new_temp;
4420 optab optab;
4421 int icode;
4422 machine_mode optab_op2_mode;
4423 gimple *def_stmt;
4424 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4425 gimple *new_stmt = NULL;
4426 stmt_vec_info prev_stmt_info;
4427 int nunits_in;
4428 int nunits_out;
4429 tree vectype_out;
4430 tree op1_vectype;
4431 int ncopies;
4432 int j, i;
4433 vec<tree> vec_oprnds0 = vNULL;
4434 vec<tree> vec_oprnds1 = vNULL;
4435 tree vop0, vop1;
4436 unsigned int k;
4437 bool scalar_shift_arg = true;
4438 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4439 vec_info *vinfo = stmt_info->vinfo;
4440 int vf;
4442 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4443 return false;
4445 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4446 && ! vec_stmt)
4447 return false;
4449 /* Is STMT a vectorizable binary/unary operation? */
4450 if (!is_gimple_assign (stmt))
4451 return false;
4453 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4454 return false;
4456 code = gimple_assign_rhs_code (stmt);
4458 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4459 || code == RROTATE_EXPR))
4460 return false;
4462 scalar_dest = gimple_assign_lhs (stmt);
4463 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4464 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4465 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4467 if (dump_enabled_p ())
4468 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4469 "bit-precision shifts not supported.\n");
4470 return false;
4473 op0 = gimple_assign_rhs1 (stmt);
4474 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4476 if (dump_enabled_p ())
4477 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4478 "use not simple.\n");
4479 return false;
4481 /* If op0 is an external or constant def use a vector type with
4482 the same size as the output vector type. */
4483 if (!vectype)
4484 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4485 if (vec_stmt)
4486 gcc_assert (vectype);
4487 if (!vectype)
4489 if (dump_enabled_p ())
4490 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4491 "no vectype for scalar type\n");
4492 return false;
4495 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4496 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4497 if (nunits_out != nunits_in)
4498 return false;
4500 op1 = gimple_assign_rhs2 (stmt);
4501 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4503 if (dump_enabled_p ())
4504 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4505 "use not simple.\n");
4506 return false;
4509 if (loop_vinfo)
4510 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4511 else
4512 vf = 1;
4514 /* Multiple types in SLP are handled by creating the appropriate number of
4515 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4516 case of SLP. */
4517 if (slp_node || PURE_SLP_STMT (stmt_info))
4518 ncopies = 1;
4519 else
4520 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4522 gcc_assert (ncopies >= 1);
4524 /* Determine whether the shift amount is a vector, or scalar. If the
4525 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4527 if ((dt[1] == vect_internal_def
4528 || dt[1] == vect_induction_def)
4529 && !slp_node)
4530 scalar_shift_arg = false;
4531 else if (dt[1] == vect_constant_def
4532 || dt[1] == vect_external_def
4533 || dt[1] == vect_internal_def)
4535 /* In SLP, need to check whether the shift count is the same,
4536 in loops if it is a constant or invariant, it is always
4537 a scalar shift. */
4538 if (slp_node)
4540 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4541 gimple *slpstmt;
4543 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4544 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4545 scalar_shift_arg = false;
4548 /* If the shift amount is computed by a pattern stmt we cannot
4549 use the scalar amount directly thus give up and use a vector
4550 shift. */
4551 if (dt[1] == vect_internal_def)
4553 gimple *def = SSA_NAME_DEF_STMT (op1);
4554 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
4555 scalar_shift_arg = false;
4558 else
4560 if (dump_enabled_p ())
4561 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4562 "operand mode requires invariant argument.\n");
4563 return false;
4566 /* Vector shifted by vector. */
4567 if (!scalar_shift_arg)
4569 optab = optab_for_tree_code (code, vectype, optab_vector);
4570 if (dump_enabled_p ())
4571 dump_printf_loc (MSG_NOTE, vect_location,
4572 "vector/vector shift/rotate found.\n");
4574 if (!op1_vectype)
4575 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4576 if (op1_vectype == NULL_TREE
4577 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4579 if (dump_enabled_p ())
4580 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4581 "unusable type for last operand in"
4582 " vector/vector shift/rotate.\n");
4583 return false;
4586 /* See if the machine has a vector shifted by scalar insn and if not
4587 then see if it has a vector shifted by vector insn. */
4588 else
4590 optab = optab_for_tree_code (code, vectype, optab_scalar);
4591 if (optab
4592 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4594 if (dump_enabled_p ())
4595 dump_printf_loc (MSG_NOTE, vect_location,
4596 "vector/scalar shift/rotate found.\n");
4598 else
4600 optab = optab_for_tree_code (code, vectype, optab_vector);
4601 if (optab
4602 && (optab_handler (optab, TYPE_MODE (vectype))
4603 != CODE_FOR_nothing))
4605 scalar_shift_arg = false;
4607 if (dump_enabled_p ())
4608 dump_printf_loc (MSG_NOTE, vect_location,
4609 "vector/vector shift/rotate found.\n");
4611 /* Unlike the other binary operators, shifts/rotates have
4612 the rhs being int, instead of the same type as the lhs,
4613 so make sure the scalar is the right type if we are
4614 dealing with vectors of long long/long/short/char. */
4615 if (dt[1] == vect_constant_def)
4616 op1 = fold_convert (TREE_TYPE (vectype), op1);
4617 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4618 TREE_TYPE (op1)))
4620 if (slp_node
4621 && TYPE_MODE (TREE_TYPE (vectype))
4622 != TYPE_MODE (TREE_TYPE (op1)))
4624 if (dump_enabled_p ())
4625 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4626 "unusable type for last operand in"
4627 " vector/vector shift/rotate.\n");
4628 return false;
4630 if (vec_stmt && !slp_node)
4632 op1 = fold_convert (TREE_TYPE (vectype), op1);
4633 op1 = vect_init_vector (stmt, op1,
4634 TREE_TYPE (vectype), NULL);
4641 /* Supportable by target? */
4642 if (!optab)
4644 if (dump_enabled_p ())
4645 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4646 "no optab.\n");
4647 return false;
4649 vec_mode = TYPE_MODE (vectype);
4650 icode = (int) optab_handler (optab, vec_mode);
4651 if (icode == CODE_FOR_nothing)
4653 if (dump_enabled_p ())
4654 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4655 "op not supported by target.\n");
4656 /* Check only during analysis. */
4657 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4658 || (vf < vect_min_worthwhile_factor (code)
4659 && !vec_stmt))
4660 return false;
4661 if (dump_enabled_p ())
4662 dump_printf_loc (MSG_NOTE, vect_location,
4663 "proceeding using word mode.\n");
4666 /* Worthwhile without SIMD support? Check only during analysis. */
4667 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4668 && vf < vect_min_worthwhile_factor (code)
4669 && !vec_stmt)
4671 if (dump_enabled_p ())
4672 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4673 "not worthwhile without SIMD support.\n");
4674 return false;
4677 if (!vec_stmt) /* transformation not required. */
4679 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4680 if (dump_enabled_p ())
4681 dump_printf_loc (MSG_NOTE, vect_location,
4682 "=== vectorizable_shift ===\n");
4683 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4684 return true;
4687 /** Transform. **/
4689 if (dump_enabled_p ())
4690 dump_printf_loc (MSG_NOTE, vect_location,
4691 "transform binary/unary operation.\n");
4693 /* Handle def. */
4694 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4696 prev_stmt_info = NULL;
4697 for (j = 0; j < ncopies; j++)
4699 /* Handle uses. */
4700 if (j == 0)
4702 if (scalar_shift_arg)
4704 /* Vector shl and shr insn patterns can be defined with scalar
4705 operand 2 (shift operand). In this case, use constant or loop
4706 invariant op1 directly, without extending it to vector mode
4707 first. */
4708 optab_op2_mode = insn_data[icode].operand[2].mode;
4709 if (!VECTOR_MODE_P (optab_op2_mode))
4711 if (dump_enabled_p ())
4712 dump_printf_loc (MSG_NOTE, vect_location,
4713 "operand 1 using scalar mode.\n");
4714 vec_oprnd1 = op1;
4715 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4716 vec_oprnds1.quick_push (vec_oprnd1);
4717 if (slp_node)
4719 /* Store vec_oprnd1 for every vector stmt to be created
4720 for SLP_NODE. We check during the analysis that all
4721 the shift arguments are the same.
4722 TODO: Allow different constants for different vector
4723 stmts generated for an SLP instance. */
4724 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4725 vec_oprnds1.quick_push (vec_oprnd1);
4730 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4731 (a special case for certain kind of vector shifts); otherwise,
4732 operand 1 should be of a vector type (the usual case). */
4733 if (vec_oprnd1)
4734 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4735 slp_node, -1);
4736 else
4737 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4738 slp_node, -1);
4740 else
4741 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4743 /* Arguments are ready. Create the new vector stmt. */
4744 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4746 vop1 = vec_oprnds1[i];
4747 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4748 new_temp = make_ssa_name (vec_dest, new_stmt);
4749 gimple_assign_set_lhs (new_stmt, new_temp);
4750 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4751 if (slp_node)
4752 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4755 if (slp_node)
4756 continue;
4758 if (j == 0)
4759 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4760 else
4761 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4762 prev_stmt_info = vinfo_for_stmt (new_stmt);
4765 vec_oprnds0.release ();
4766 vec_oprnds1.release ();
4768 return true;
4772 /* Function vectorizable_operation.
4774 Check if STMT performs a binary, unary or ternary operation that can
4775 be vectorized.
4776 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4777 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4778 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4780 static bool
4781 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
4782 gimple **vec_stmt, slp_tree slp_node)
4784 tree vec_dest;
4785 tree scalar_dest;
4786 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4787 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4788 tree vectype;
4789 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4790 enum tree_code code;
4791 machine_mode vec_mode;
4792 tree new_temp;
4793 int op_type;
4794 optab optab;
4795 bool target_support_p;
4796 gimple *def_stmt;
4797 enum vect_def_type dt[3]
4798 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4799 gimple *new_stmt = NULL;
4800 stmt_vec_info prev_stmt_info;
4801 int nunits_in;
4802 int nunits_out;
4803 tree vectype_out;
4804 int ncopies;
4805 int j, i;
4806 vec<tree> vec_oprnds0 = vNULL;
4807 vec<tree> vec_oprnds1 = vNULL;
4808 vec<tree> vec_oprnds2 = vNULL;
4809 tree vop0, vop1, vop2;
4810 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4811 vec_info *vinfo = stmt_info->vinfo;
4812 int vf;
4814 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4815 return false;
4817 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4818 && ! vec_stmt)
4819 return false;
4821 /* Is STMT a vectorizable binary/unary operation? */
4822 if (!is_gimple_assign (stmt))
4823 return false;
4825 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4826 return false;
4828 code = gimple_assign_rhs_code (stmt);
4830 /* For pointer addition, we should use the normal plus for
4831 the vector addition. */
4832 if (code == POINTER_PLUS_EXPR)
4833 code = PLUS_EXPR;
4835 /* Support only unary or binary operations. */
4836 op_type = TREE_CODE_LENGTH (code);
4837 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4839 if (dump_enabled_p ())
4840 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4841 "num. args = %d (not unary/binary/ternary op).\n",
4842 op_type);
4843 return false;
4846 scalar_dest = gimple_assign_lhs (stmt);
4847 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4849 /* Most operations cannot handle bit-precision types without extra
4850 truncations. */
4851 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4852 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4853 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4854 /* Exception are bitwise binary operations. */
4855 && code != BIT_IOR_EXPR
4856 && code != BIT_XOR_EXPR
4857 && code != BIT_AND_EXPR)
4859 if (dump_enabled_p ())
4860 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4861 "bit-precision arithmetic not supported.\n");
4862 return false;
4865 op0 = gimple_assign_rhs1 (stmt);
4866 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4868 if (dump_enabled_p ())
4869 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4870 "use not simple.\n");
4871 return false;
4873 /* If op0 is an external or constant def use a vector type with
4874 the same size as the output vector type. */
4875 if (!vectype)
4877 /* For boolean type we cannot determine vectype by
4878 invariant value (don't know whether it is a vector
4879 of booleans or vector of integers). We use output
4880 vectype because operations on boolean don't change
4881 type. */
4882 if (TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE)
4884 if (TREE_CODE (TREE_TYPE (scalar_dest)) != BOOLEAN_TYPE)
4886 if (dump_enabled_p ())
4887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4888 "not supported operation on bool value.\n");
4889 return false;
4891 vectype = vectype_out;
4893 else
4894 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4896 if (vec_stmt)
4897 gcc_assert (vectype);
4898 if (!vectype)
4900 if (dump_enabled_p ())
4902 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4903 "no vectype for scalar type ");
4904 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4905 TREE_TYPE (op0));
4906 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4909 return false;
4912 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4913 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4914 if (nunits_out != nunits_in)
4915 return false;
4917 if (op_type == binary_op || op_type == ternary_op)
4919 op1 = gimple_assign_rhs2 (stmt);
4920 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
4922 if (dump_enabled_p ())
4923 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4924 "use not simple.\n");
4925 return false;
4928 if (op_type == ternary_op)
4930 op2 = gimple_assign_rhs3 (stmt);
4931 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
4933 if (dump_enabled_p ())
4934 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4935 "use not simple.\n");
4936 return false;
4940 if (loop_vinfo)
4941 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4942 else
4943 vf = 1;
4945 /* Multiple types in SLP are handled by creating the appropriate number of
4946 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4947 case of SLP. */
4948 if (slp_node || PURE_SLP_STMT (stmt_info))
4949 ncopies = 1;
4950 else
4951 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4953 gcc_assert (ncopies >= 1);
4955 /* Shifts are handled in vectorizable_shift (). */
4956 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4957 || code == RROTATE_EXPR)
4958 return false;
4960 /* Supportable by target? */
4962 vec_mode = TYPE_MODE (vectype);
4963 if (code == MULT_HIGHPART_EXPR)
4964 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
4965 else
4967 optab = optab_for_tree_code (code, vectype, optab_default);
4968 if (!optab)
4970 if (dump_enabled_p ())
4971 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4972 "no optab.\n");
4973 return false;
4975 target_support_p = (optab_handler (optab, vec_mode)
4976 != CODE_FOR_nothing);
4979 if (!target_support_p)
4981 if (dump_enabled_p ())
4982 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4983 "op not supported by target.\n");
4984 /* Check only during analysis. */
4985 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4986 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4987 return false;
4988 if (dump_enabled_p ())
4989 dump_printf_loc (MSG_NOTE, vect_location,
4990 "proceeding using word mode.\n");
4993 /* Worthwhile without SIMD support? Check only during analysis. */
4994 if (!VECTOR_MODE_P (vec_mode)
4995 && !vec_stmt
4996 && vf < vect_min_worthwhile_factor (code))
4998 if (dump_enabled_p ())
4999 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5000 "not worthwhile without SIMD support.\n");
5001 return false;
5004 if (!vec_stmt) /* transformation not required. */
5006 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5007 if (dump_enabled_p ())
5008 dump_printf_loc (MSG_NOTE, vect_location,
5009 "=== vectorizable_operation ===\n");
5010 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
5011 return true;
5014 /** Transform. **/
5016 if (dump_enabled_p ())
5017 dump_printf_loc (MSG_NOTE, vect_location,
5018 "transform binary/unary operation.\n");
5020 /* Handle def. */
5021 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5023 /* In case the vectorization factor (VF) is bigger than the number
5024 of elements that we can fit in a vectype (nunits), we have to generate
5025 more than one vector stmt - i.e - we need to "unroll" the
5026 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5027 from one copy of the vector stmt to the next, in the field
5028 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5029 stages to find the correct vector defs to be used when vectorizing
5030 stmts that use the defs of the current stmt. The example below
5031 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5032 we need to create 4 vectorized stmts):
5034 before vectorization:
5035 RELATED_STMT VEC_STMT
5036 S1: x = memref - -
5037 S2: z = x + 1 - -
5039 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5040 there):
5041 RELATED_STMT VEC_STMT
5042 VS1_0: vx0 = memref0 VS1_1 -
5043 VS1_1: vx1 = memref1 VS1_2 -
5044 VS1_2: vx2 = memref2 VS1_3 -
5045 VS1_3: vx3 = memref3 - -
5046 S1: x = load - VS1_0
5047 S2: z = x + 1 - -
5049 step2: vectorize stmt S2 (done here):
5050 To vectorize stmt S2 we first need to find the relevant vector
5051 def for the first operand 'x'. This is, as usual, obtained from
5052 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5053 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5054 relevant vector def 'vx0'. Having found 'vx0' we can generate
5055 the vector stmt VS2_0, and as usual, record it in the
5056 STMT_VINFO_VEC_STMT of stmt S2.
5057 When creating the second copy (VS2_1), we obtain the relevant vector
5058 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5059 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5060 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5061 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5062 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5063 chain of stmts and pointers:
5064 RELATED_STMT VEC_STMT
5065 VS1_0: vx0 = memref0 VS1_1 -
5066 VS1_1: vx1 = memref1 VS1_2 -
5067 VS1_2: vx2 = memref2 VS1_3 -
5068 VS1_3: vx3 = memref3 - -
5069 S1: x = load - VS1_0
5070 VS2_0: vz0 = vx0 + v1 VS2_1 -
5071 VS2_1: vz1 = vx1 + v1 VS2_2 -
5072 VS2_2: vz2 = vx2 + v1 VS2_3 -
5073 VS2_3: vz3 = vx3 + v1 - -
5074 S2: z = x + 1 - VS2_0 */
5076 prev_stmt_info = NULL;
5077 for (j = 0; j < ncopies; j++)
5079 /* Handle uses. */
5080 if (j == 0)
5082 if (op_type == binary_op || op_type == ternary_op)
5083 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5084 slp_node, -1);
5085 else
5086 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5087 slp_node, -1);
5088 if (op_type == ternary_op)
5089 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5090 slp_node, -1);
5092 else
5094 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5095 if (op_type == ternary_op)
5097 tree vec_oprnd = vec_oprnds2.pop ();
5098 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5099 vec_oprnd));
5103 /* Arguments are ready. Create the new vector stmt. */
5104 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5106 vop1 = ((op_type == binary_op || op_type == ternary_op)
5107 ? vec_oprnds1[i] : NULL_TREE);
5108 vop2 = ((op_type == ternary_op)
5109 ? vec_oprnds2[i] : NULL_TREE);
5110 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5111 new_temp = make_ssa_name (vec_dest, new_stmt);
5112 gimple_assign_set_lhs (new_stmt, new_temp);
5113 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5114 if (slp_node)
5115 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5118 if (slp_node)
5119 continue;
5121 if (j == 0)
5122 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5123 else
5124 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5125 prev_stmt_info = vinfo_for_stmt (new_stmt);
5128 vec_oprnds0.release ();
5129 vec_oprnds1.release ();
5130 vec_oprnds2.release ();
5132 return true;
5135 /* A helper function to ensure data reference DR's base alignment
5136 for STMT_INFO. */
5138 static void
5139 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
5141 if (!dr->aux)
5142 return;
5144 if (DR_VECT_AUX (dr)->base_misaligned)
5146 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5147 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5149 if (decl_in_symtab_p (base_decl))
5150 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
5151 else
5153 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
5154 DECL_USER_ALIGN (base_decl) = 1;
5156 DR_VECT_AUX (dr)->base_misaligned = false;
5161 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
5162 reversal of the vector elements. If that is impossible to do,
5163 returns NULL. */
5165 static tree
5166 perm_mask_for_reverse (tree vectype)
5168 int i, nunits;
5169 unsigned char *sel;
5171 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5172 sel = XALLOCAVEC (unsigned char, nunits);
5174 for (i = 0; i < nunits; ++i)
5175 sel[i] = nunits - 1 - i;
5177 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
5178 return NULL_TREE;
5179 return vect_gen_perm_mask_checked (vectype, sel);
5182 /* Function vectorizable_store.
5184 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5185 can be vectorized.
5186 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5187 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5188 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5190 static bool
5191 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5192 slp_tree slp_node)
5194 tree scalar_dest;
5195 tree data_ref;
5196 tree op;
5197 tree vec_oprnd = NULL_TREE;
5198 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5199 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5200 tree elem_type;
5201 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5202 struct loop *loop = NULL;
5203 machine_mode vec_mode;
5204 tree dummy;
5205 enum dr_alignment_support alignment_support_scheme;
5206 gimple *def_stmt;
5207 enum vect_def_type dt;
5208 stmt_vec_info prev_stmt_info = NULL;
5209 tree dataref_ptr = NULL_TREE;
5210 tree dataref_offset = NULL_TREE;
5211 gimple *ptr_incr = NULL;
5212 int ncopies;
5213 int j;
5214 gimple *next_stmt, *first_stmt = NULL;
5215 bool grouped_store = false;
5216 bool store_lanes_p = false;
5217 unsigned int group_size, i;
5218 vec<tree> dr_chain = vNULL;
5219 vec<tree> oprnds = vNULL;
5220 vec<tree> result_chain = vNULL;
5221 bool inv_p;
5222 bool negative = false;
5223 tree offset = NULL_TREE;
5224 vec<tree> vec_oprnds = vNULL;
5225 bool slp = (slp_node != NULL);
5226 unsigned int vec_num;
5227 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5228 vec_info *vinfo = stmt_info->vinfo;
5229 tree aggr_type;
5230 tree scatter_base = NULL_TREE, scatter_off = NULL_TREE;
5231 tree scatter_off_vectype = NULL_TREE, scatter_decl = NULL_TREE;
5232 int scatter_scale = 1;
5233 enum vect_def_type scatter_idx_dt = vect_unknown_def_type;
5234 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5235 gimple *new_stmt;
5237 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5238 return false;
5240 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5241 && ! vec_stmt)
5242 return false;
5244 /* Is vectorizable store? */
5246 if (!is_gimple_assign (stmt))
5247 return false;
5249 scalar_dest = gimple_assign_lhs (stmt);
5250 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5251 && is_pattern_stmt_p (stmt_info))
5252 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5253 if (TREE_CODE (scalar_dest) != ARRAY_REF
5254 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5255 && TREE_CODE (scalar_dest) != INDIRECT_REF
5256 && TREE_CODE (scalar_dest) != COMPONENT_REF
5257 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5258 && TREE_CODE (scalar_dest) != REALPART_EXPR
5259 && TREE_CODE (scalar_dest) != MEM_REF)
5260 return false;
5262 gcc_assert (gimple_assign_single_p (stmt));
5264 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5265 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5267 if (loop_vinfo)
5268 loop = LOOP_VINFO_LOOP (loop_vinfo);
5270 /* Multiple types in SLP are handled by creating the appropriate number of
5271 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5272 case of SLP. */
5273 if (slp || PURE_SLP_STMT (stmt_info))
5274 ncopies = 1;
5275 else
5276 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5278 gcc_assert (ncopies >= 1);
5280 /* FORNOW. This restriction should be relaxed. */
5281 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5283 if (dump_enabled_p ())
5284 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5285 "multiple types in nested loop.\n");
5286 return false;
5289 op = gimple_assign_rhs1 (stmt);
5291 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
5293 if (dump_enabled_p ())
5294 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5295 "use not simple.\n");
5296 return false;
5299 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5300 return false;
5302 elem_type = TREE_TYPE (vectype);
5303 vec_mode = TYPE_MODE (vectype);
5305 /* FORNOW. In some cases can vectorize even if data-type not supported
5306 (e.g. - array initialization with 0). */
5307 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5308 return false;
5310 if (!STMT_VINFO_DATA_REF (stmt_info))
5311 return false;
5313 if (!STMT_VINFO_STRIDED_P (stmt_info))
5315 negative =
5316 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5317 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5318 size_zero_node) < 0;
5319 if (negative && ncopies > 1)
5321 if (dump_enabled_p ())
5322 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5323 "multiple types with negative step.\n");
5324 return false;
5326 if (negative)
5328 gcc_assert (!grouped_store);
5329 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5330 if (alignment_support_scheme != dr_aligned
5331 && alignment_support_scheme != dr_unaligned_supported)
5333 if (dump_enabled_p ())
5334 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5335 "negative step but alignment required.\n");
5336 return false;
5338 if (dt != vect_constant_def
5339 && dt != vect_external_def
5340 && !perm_mask_for_reverse (vectype))
5342 if (dump_enabled_p ())
5343 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5344 "negative step and reversing not supported.\n");
5345 return false;
5350 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5352 grouped_store = true;
5353 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5354 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5355 if (!slp
5356 && !PURE_SLP_STMT (stmt_info)
5357 && !STMT_VINFO_STRIDED_P (stmt_info))
5359 if (vect_store_lanes_supported (vectype, group_size))
5360 store_lanes_p = true;
5361 else if (!vect_grouped_store_supported (vectype, group_size))
5362 return false;
5365 if (STMT_VINFO_STRIDED_P (stmt_info)
5366 && (slp || PURE_SLP_STMT (stmt_info))
5367 && (group_size > nunits
5368 || nunits % group_size != 0))
5370 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5371 "unhandled strided group store\n");
5372 return false;
5375 if (first_stmt == stmt)
5377 /* STMT is the leader of the group. Check the operands of all the
5378 stmts of the group. */
5379 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5380 while (next_stmt)
5382 gcc_assert (gimple_assign_single_p (next_stmt));
5383 op = gimple_assign_rhs1 (next_stmt);
5384 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
5386 if (dump_enabled_p ())
5387 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5388 "use not simple.\n");
5389 return false;
5391 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5396 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5398 gimple *def_stmt;
5399 scatter_decl = vect_check_gather_scatter (stmt, loop_vinfo, &scatter_base,
5400 &scatter_off, &scatter_scale);
5401 gcc_assert (scatter_decl);
5402 if (!vect_is_simple_use (scatter_off, vinfo, &def_stmt, &scatter_idx_dt,
5403 &scatter_off_vectype))
5405 if (dump_enabled_p ())
5406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5407 "scatter index use not simple.");
5408 return false;
5412 if (!vec_stmt) /* transformation not required. */
5414 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5415 /* The SLP costs are calculated during SLP analysis. */
5416 if (!PURE_SLP_STMT (stmt_info))
5417 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5418 NULL, NULL, NULL);
5419 return true;
5422 /** Transform. **/
5424 ensure_base_align (stmt_info, dr);
5426 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5428 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5429 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (scatter_decl));
5430 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5431 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5432 edge pe = loop_preheader_edge (loop);
5433 gimple_seq seq;
5434 basic_block new_bb;
5435 enum { NARROW, NONE, WIDEN } modifier;
5436 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (scatter_off_vectype);
5438 if (nunits == (unsigned int) scatter_off_nunits)
5439 modifier = NONE;
5440 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5442 unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
5443 modifier = WIDEN;
5445 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5446 sel[i] = i | nunits;
5448 perm_mask = vect_gen_perm_mask_checked (scatter_off_vectype, sel);
5449 gcc_assert (perm_mask != NULL_TREE);
5451 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5453 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5454 modifier = NARROW;
5456 for (i = 0; i < (unsigned int) nunits; ++i)
5457 sel[i] = i | scatter_off_nunits;
5459 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5460 gcc_assert (perm_mask != NULL_TREE);
5461 ncopies *= 2;
5463 else
5464 gcc_unreachable ();
5466 rettype = TREE_TYPE (TREE_TYPE (scatter_decl));
5467 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5468 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5469 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5470 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5471 scaletype = TREE_VALUE (arglist);
5473 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5474 && TREE_CODE (rettype) == VOID_TYPE);
5476 ptr = fold_convert (ptrtype, scatter_base);
5477 if (!is_gimple_min_invariant (ptr))
5479 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5480 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5481 gcc_assert (!new_bb);
5484 /* Currently we support only unconditional scatter stores,
5485 so mask should be all ones. */
5486 mask = build_int_cst (masktype, -1);
5487 mask = vect_init_vector (stmt, mask, masktype, NULL);
5489 scale = build_int_cst (scaletype, scatter_scale);
5491 prev_stmt_info = NULL;
5492 for (j = 0; j < ncopies; ++j)
5494 if (j == 0)
5496 src = vec_oprnd1
5497 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
5498 op = vec_oprnd0
5499 = vect_get_vec_def_for_operand (scatter_off, stmt);
5501 else if (modifier != NONE && (j & 1))
5503 if (modifier == WIDEN)
5505 src = vec_oprnd1
5506 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5507 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5508 stmt, gsi);
5510 else if (modifier == NARROW)
5512 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5513 stmt, gsi);
5514 op = vec_oprnd0
5515 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
5517 else
5518 gcc_unreachable ();
5520 else
5522 src = vec_oprnd1
5523 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5524 op = vec_oprnd0
5525 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
5528 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5530 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5531 == TYPE_VECTOR_SUBPARTS (srctype));
5532 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5533 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5534 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5535 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5536 src = var;
5539 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5541 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5542 == TYPE_VECTOR_SUBPARTS (idxtype));
5543 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5544 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5545 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5546 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5547 op = var;
5550 new_stmt
5551 = gimple_build_call (scatter_decl, 5, ptr, mask, op, src, scale);
5553 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5555 if (prev_stmt_info == NULL)
5556 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5557 else
5558 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5559 prev_stmt_info = vinfo_for_stmt (new_stmt);
5561 return true;
5564 if (grouped_store)
5566 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5567 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5569 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5571 /* FORNOW */
5572 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5574 /* We vectorize all the stmts of the interleaving group when we
5575 reach the last stmt in the group. */
5576 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5577 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5578 && !slp)
5580 *vec_stmt = NULL;
5581 return true;
5584 if (slp)
5586 grouped_store = false;
5587 /* VEC_NUM is the number of vect stmts to be created for this
5588 group. */
5589 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5590 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5591 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
5592 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5593 op = gimple_assign_rhs1 (first_stmt);
5595 else
5596 /* VEC_NUM is the number of vect stmts to be created for this
5597 group. */
5598 vec_num = group_size;
5600 else
5602 first_stmt = stmt;
5603 first_dr = dr;
5604 group_size = vec_num = 1;
5607 if (dump_enabled_p ())
5608 dump_printf_loc (MSG_NOTE, vect_location,
5609 "transform store. ncopies = %d\n", ncopies);
5611 if (STMT_VINFO_STRIDED_P (stmt_info))
5613 gimple_stmt_iterator incr_gsi;
5614 bool insert_after;
5615 gimple *incr;
5616 tree offvar;
5617 tree ivstep;
5618 tree running_off;
5619 gimple_seq stmts = NULL;
5620 tree stride_base, stride_step, alias_off;
5621 tree vec_oprnd;
5622 unsigned int g;
5624 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5626 stride_base
5627 = fold_build_pointer_plus
5628 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
5629 size_binop (PLUS_EXPR,
5630 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5631 convert_to_ptrofftype (DR_INIT(first_dr))));
5632 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
5634 /* For a store with loop-invariant (but other than power-of-2)
5635 stride (i.e. not a grouped access) like so:
5637 for (i = 0; i < n; i += stride)
5638 array[i] = ...;
5640 we generate a new induction variable and new stores from
5641 the components of the (vectorized) rhs:
5643 for (j = 0; ; j += VF*stride)
5644 vectemp = ...;
5645 tmp1 = vectemp[0];
5646 array[j] = tmp1;
5647 tmp2 = vectemp[1];
5648 array[j + stride] = tmp2;
5652 unsigned nstores = nunits;
5653 tree ltype = elem_type;
5654 if (slp)
5656 nstores = nunits / group_size;
5657 if (group_size < nunits)
5658 ltype = build_vector_type (elem_type, group_size);
5659 else
5660 ltype = vectype;
5661 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
5662 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5663 group_size = 1;
5666 ivstep = stride_step;
5667 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5668 build_int_cst (TREE_TYPE (ivstep),
5669 ncopies * nstores));
5671 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5673 create_iv (stride_base, ivstep, NULL,
5674 loop, &incr_gsi, insert_after,
5675 &offvar, NULL);
5676 incr = gsi_stmt (incr_gsi);
5677 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
5679 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5680 if (stmts)
5681 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5683 prev_stmt_info = NULL;
5684 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
5685 next_stmt = first_stmt;
5686 for (g = 0; g < group_size; g++)
5688 running_off = offvar;
5689 if (g)
5691 tree size = TYPE_SIZE_UNIT (ltype);
5692 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
5693 size);
5694 tree newoff = copy_ssa_name (running_off, NULL);
5695 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5696 running_off, pos);
5697 vect_finish_stmt_generation (stmt, incr, gsi);
5698 running_off = newoff;
5700 for (j = 0; j < ncopies; j++)
5702 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5703 and first_stmt == stmt. */
5704 if (j == 0)
5706 if (slp)
5708 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
5709 slp_node, -1);
5710 vec_oprnd = vec_oprnds[0];
5712 else
5714 gcc_assert (gimple_assign_single_p (next_stmt));
5715 op = gimple_assign_rhs1 (next_stmt);
5716 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
5719 else
5721 if (slp)
5722 vec_oprnd = vec_oprnds[j];
5723 else
5725 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
5726 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
5730 for (i = 0; i < nstores; i++)
5732 tree newref, newoff;
5733 gimple *incr, *assign;
5734 tree size = TYPE_SIZE (ltype);
5735 /* Extract the i'th component. */
5736 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
5737 bitsize_int (i), size);
5738 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
5739 size, pos);
5741 elem = force_gimple_operand_gsi (gsi, elem, true,
5742 NULL_TREE, true,
5743 GSI_SAME_STMT);
5745 newref = build2 (MEM_REF, ltype,
5746 running_off, alias_off);
5748 /* And store it to *running_off. */
5749 assign = gimple_build_assign (newref, elem);
5750 vect_finish_stmt_generation (stmt, assign, gsi);
5752 newoff = copy_ssa_name (running_off, NULL);
5753 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5754 running_off, stride_step);
5755 vect_finish_stmt_generation (stmt, incr, gsi);
5757 running_off = newoff;
5758 if (g == group_size - 1
5759 && !slp)
5761 if (j == 0 && i == 0)
5762 STMT_VINFO_VEC_STMT (stmt_info)
5763 = *vec_stmt = assign;
5764 else
5765 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
5766 prev_stmt_info = vinfo_for_stmt (assign);
5770 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5772 return true;
5775 dr_chain.create (group_size);
5776 oprnds.create (group_size);
5778 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5779 gcc_assert (alignment_support_scheme);
5780 /* Targets with store-lane instructions must not require explicit
5781 realignment. */
5782 gcc_assert (!store_lanes_p
5783 || alignment_support_scheme == dr_aligned
5784 || alignment_support_scheme == dr_unaligned_supported);
5786 if (negative)
5787 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5789 if (store_lanes_p)
5790 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5791 else
5792 aggr_type = vectype;
5794 /* In case the vectorization factor (VF) is bigger than the number
5795 of elements that we can fit in a vectype (nunits), we have to generate
5796 more than one vector stmt - i.e - we need to "unroll" the
5797 vector stmt by a factor VF/nunits. For more details see documentation in
5798 vect_get_vec_def_for_copy_stmt. */
5800 /* In case of interleaving (non-unit grouped access):
5802 S1: &base + 2 = x2
5803 S2: &base = x0
5804 S3: &base + 1 = x1
5805 S4: &base + 3 = x3
5807 We create vectorized stores starting from base address (the access of the
5808 first stmt in the chain (S2 in the above example), when the last store stmt
5809 of the chain (S4) is reached:
5811 VS1: &base = vx2
5812 VS2: &base + vec_size*1 = vx0
5813 VS3: &base + vec_size*2 = vx1
5814 VS4: &base + vec_size*3 = vx3
5816 Then permutation statements are generated:
5818 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5819 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5822 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5823 (the order of the data-refs in the output of vect_permute_store_chain
5824 corresponds to the order of scalar stmts in the interleaving chain - see
5825 the documentation of vect_permute_store_chain()).
5827 In case of both multiple types and interleaving, above vector stores and
5828 permutation stmts are created for every copy. The result vector stmts are
5829 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5830 STMT_VINFO_RELATED_STMT for the next copies.
5833 prev_stmt_info = NULL;
5834 for (j = 0; j < ncopies; j++)
5837 if (j == 0)
5839 if (slp)
5841 /* Get vectorized arguments for SLP_NODE. */
5842 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5843 NULL, slp_node, -1);
5845 vec_oprnd = vec_oprnds[0];
5847 else
5849 /* For interleaved stores we collect vectorized defs for all the
5850 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5851 used as an input to vect_permute_store_chain(), and OPRNDS as
5852 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5854 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5855 OPRNDS are of size 1. */
5856 next_stmt = first_stmt;
5857 for (i = 0; i < group_size; i++)
5859 /* Since gaps are not supported for interleaved stores,
5860 GROUP_SIZE is the exact number of stmts in the chain.
5861 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5862 there is no interleaving, GROUP_SIZE is 1, and only one
5863 iteration of the loop will be executed. */
5864 gcc_assert (next_stmt
5865 && gimple_assign_single_p (next_stmt));
5866 op = gimple_assign_rhs1 (next_stmt);
5868 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
5869 dr_chain.quick_push (vec_oprnd);
5870 oprnds.quick_push (vec_oprnd);
5871 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5875 /* We should have catched mismatched types earlier. */
5876 gcc_assert (useless_type_conversion_p (vectype,
5877 TREE_TYPE (vec_oprnd)));
5878 bool simd_lane_access_p
5879 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5880 if (simd_lane_access_p
5881 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5882 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5883 && integer_zerop (DR_OFFSET (first_dr))
5884 && integer_zerop (DR_INIT (first_dr))
5885 && alias_sets_conflict_p (get_alias_set (aggr_type),
5886 get_alias_set (DR_REF (first_dr))))
5888 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5889 dataref_offset = build_int_cst (reference_alias_ptr_type
5890 (DR_REF (first_dr)), 0);
5891 inv_p = false;
5893 else
5894 dataref_ptr
5895 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5896 simd_lane_access_p ? loop : NULL,
5897 offset, &dummy, gsi, &ptr_incr,
5898 simd_lane_access_p, &inv_p);
5899 gcc_assert (bb_vinfo || !inv_p);
5901 else
5903 /* For interleaved stores we created vectorized defs for all the
5904 defs stored in OPRNDS in the previous iteration (previous copy).
5905 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5906 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5907 next copy.
5908 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5909 OPRNDS are of size 1. */
5910 for (i = 0; i < group_size; i++)
5912 op = oprnds[i];
5913 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
5914 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5915 dr_chain[i] = vec_oprnd;
5916 oprnds[i] = vec_oprnd;
5918 if (dataref_offset)
5919 dataref_offset
5920 = int_const_binop (PLUS_EXPR, dataref_offset,
5921 TYPE_SIZE_UNIT (aggr_type));
5922 else
5923 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5924 TYPE_SIZE_UNIT (aggr_type));
5927 if (store_lanes_p)
5929 tree vec_array;
5931 /* Combine all the vectors into an array. */
5932 vec_array = create_vector_array (vectype, vec_num);
5933 for (i = 0; i < vec_num; i++)
5935 vec_oprnd = dr_chain[i];
5936 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5939 /* Emit:
5940 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5941 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5942 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5943 gimple_call_set_lhs (new_stmt, data_ref);
5944 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5946 else
5948 new_stmt = NULL;
5949 if (grouped_store)
5951 if (j == 0)
5952 result_chain.create (group_size);
5953 /* Permute. */
5954 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5955 &result_chain);
5958 next_stmt = first_stmt;
5959 for (i = 0; i < vec_num; i++)
5961 unsigned align, misalign;
5963 if (i > 0)
5964 /* Bump the vector pointer. */
5965 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5966 stmt, NULL_TREE);
5968 if (slp)
5969 vec_oprnd = vec_oprnds[i];
5970 else if (grouped_store)
5971 /* For grouped stores vectorized defs are interleaved in
5972 vect_permute_store_chain(). */
5973 vec_oprnd = result_chain[i];
5975 data_ref = fold_build2 (MEM_REF, TREE_TYPE (vec_oprnd),
5976 dataref_ptr,
5977 dataref_offset
5978 ? dataref_offset
5979 : build_int_cst (reference_alias_ptr_type
5980 (DR_REF (first_dr)), 0));
5981 align = TYPE_ALIGN_UNIT (vectype);
5982 if (aligned_access_p (first_dr))
5983 misalign = 0;
5984 else if (DR_MISALIGNMENT (first_dr) == -1)
5986 if (DR_VECT_AUX (first_dr)->base_element_aligned)
5987 align = TYPE_ALIGN_UNIT (elem_type);
5988 else
5989 align = get_object_alignment (DR_REF (first_dr))
5990 / BITS_PER_UNIT;
5991 misalign = 0;
5992 TREE_TYPE (data_ref)
5993 = build_aligned_type (TREE_TYPE (data_ref),
5994 align * BITS_PER_UNIT);
5996 else
5998 TREE_TYPE (data_ref)
5999 = build_aligned_type (TREE_TYPE (data_ref),
6000 TYPE_ALIGN (elem_type));
6001 misalign = DR_MISALIGNMENT (first_dr);
6003 if (dataref_offset == NULL_TREE
6004 && TREE_CODE (dataref_ptr) == SSA_NAME)
6005 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6006 misalign);
6008 if (negative
6009 && dt != vect_constant_def
6010 && dt != vect_external_def)
6012 tree perm_mask = perm_mask_for_reverse (vectype);
6013 tree perm_dest
6014 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6015 vectype);
6016 tree new_temp = make_ssa_name (perm_dest);
6018 /* Generate the permute statement. */
6019 gimple *perm_stmt
6020 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6021 vec_oprnd, perm_mask);
6022 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6024 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6025 vec_oprnd = new_temp;
6028 /* Arguments are ready. Create the new vector stmt. */
6029 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6030 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6032 if (slp)
6033 continue;
6035 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6036 if (!next_stmt)
6037 break;
6040 if (!slp)
6042 if (j == 0)
6043 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6044 else
6045 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6046 prev_stmt_info = vinfo_for_stmt (new_stmt);
6050 dr_chain.release ();
6051 oprnds.release ();
6052 result_chain.release ();
6053 vec_oprnds.release ();
6055 return true;
6058 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6059 VECTOR_CST mask. No checks are made that the target platform supports the
6060 mask, so callers may wish to test can_vec_perm_p separately, or use
6061 vect_gen_perm_mask_checked. */
6063 tree
6064 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
6066 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
6067 int i, nunits;
6069 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6071 mask_elt_type = lang_hooks.types.type_for_mode
6072 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
6073 mask_type = get_vectype_for_scalar_type (mask_elt_type);
6075 mask_elts = XALLOCAVEC (tree, nunits);
6076 for (i = nunits - 1; i >= 0; i--)
6077 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
6078 mask_vec = build_vector (mask_type, mask_elts);
6080 return mask_vec;
6083 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6084 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6086 tree
6087 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
6089 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
6090 return vect_gen_perm_mask_any (vectype, sel);
6093 /* Given a vector variable X and Y, that was generated for the scalar
6094 STMT, generate instructions to permute the vector elements of X and Y
6095 using permutation mask MASK_VEC, insert them at *GSI and return the
6096 permuted vector variable. */
6098 static tree
6099 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6100 gimple_stmt_iterator *gsi)
6102 tree vectype = TREE_TYPE (x);
6103 tree perm_dest, data_ref;
6104 gimple *perm_stmt;
6106 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6107 data_ref = make_ssa_name (perm_dest);
6109 /* Generate the permute statement. */
6110 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6111 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6113 return data_ref;
6116 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6117 inserting them on the loops preheader edge. Returns true if we
6118 were successful in doing so (and thus STMT can be moved then),
6119 otherwise returns false. */
6121 static bool
6122 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6124 ssa_op_iter i;
6125 tree op;
6126 bool any = false;
6128 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6130 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6131 if (!gimple_nop_p (def_stmt)
6132 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6134 /* Make sure we don't need to recurse. While we could do
6135 so in simple cases when there are more complex use webs
6136 we don't have an easy way to preserve stmt order to fulfil
6137 dependencies within them. */
6138 tree op2;
6139 ssa_op_iter i2;
6140 if (gimple_code (def_stmt) == GIMPLE_PHI)
6141 return false;
6142 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6144 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6145 if (!gimple_nop_p (def_stmt2)
6146 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6147 return false;
6149 any = true;
6153 if (!any)
6154 return true;
6156 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6158 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6159 if (!gimple_nop_p (def_stmt)
6160 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6162 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6163 gsi_remove (&gsi, false);
6164 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6168 return true;
6171 /* vectorizable_load.
6173 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6174 can be vectorized.
6175 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6176 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6177 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6179 static bool
6180 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6181 slp_tree slp_node, slp_instance slp_node_instance)
6183 tree scalar_dest;
6184 tree vec_dest = NULL;
6185 tree data_ref = NULL;
6186 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6187 stmt_vec_info prev_stmt_info;
6188 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6189 struct loop *loop = NULL;
6190 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6191 bool nested_in_vect_loop = false;
6192 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6193 tree elem_type;
6194 tree new_temp;
6195 machine_mode mode;
6196 gimple *new_stmt = NULL;
6197 tree dummy;
6198 enum dr_alignment_support alignment_support_scheme;
6199 tree dataref_ptr = NULL_TREE;
6200 tree dataref_offset = NULL_TREE;
6201 gimple *ptr_incr = NULL;
6202 int ncopies;
6203 int i, j, group_size = -1, group_gap_adj;
6204 tree msq = NULL_TREE, lsq;
6205 tree offset = NULL_TREE;
6206 tree byte_offset = NULL_TREE;
6207 tree realignment_token = NULL_TREE;
6208 gphi *phi = NULL;
6209 vec<tree> dr_chain = vNULL;
6210 bool grouped_load = false;
6211 bool load_lanes_p = false;
6212 gimple *first_stmt;
6213 gimple *first_stmt_for_drptr = NULL;
6214 bool inv_p;
6215 bool negative = false;
6216 bool compute_in_loop = false;
6217 struct loop *at_loop;
6218 int vec_num;
6219 bool slp = (slp_node != NULL);
6220 bool slp_perm = false;
6221 enum tree_code code;
6222 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6223 int vf;
6224 tree aggr_type;
6225 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
6226 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
6227 int gather_scale = 1;
6228 enum vect_def_type gather_dt = vect_unknown_def_type;
6229 vec_info *vinfo = stmt_info->vinfo;
6231 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6232 return false;
6234 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6235 && ! vec_stmt)
6236 return false;
6238 /* Is vectorizable load? */
6239 if (!is_gimple_assign (stmt))
6240 return false;
6242 scalar_dest = gimple_assign_lhs (stmt);
6243 if (TREE_CODE (scalar_dest) != SSA_NAME)
6244 return false;
6246 code = gimple_assign_rhs_code (stmt);
6247 if (code != ARRAY_REF
6248 && code != BIT_FIELD_REF
6249 && code != INDIRECT_REF
6250 && code != COMPONENT_REF
6251 && code != IMAGPART_EXPR
6252 && code != REALPART_EXPR
6253 && code != MEM_REF
6254 && TREE_CODE_CLASS (code) != tcc_declaration)
6255 return false;
6257 if (!STMT_VINFO_DATA_REF (stmt_info))
6258 return false;
6260 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6261 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6263 if (loop_vinfo)
6265 loop = LOOP_VINFO_LOOP (loop_vinfo);
6266 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6267 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6269 else
6270 vf = 1;
6272 /* Multiple types in SLP are handled by creating the appropriate number of
6273 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6274 case of SLP. */
6275 if (slp || PURE_SLP_STMT (stmt_info))
6276 ncopies = 1;
6277 else
6278 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6280 gcc_assert (ncopies >= 1);
6282 /* FORNOW. This restriction should be relaxed. */
6283 if (nested_in_vect_loop && ncopies > 1)
6285 if (dump_enabled_p ())
6286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6287 "multiple types in nested loop.\n");
6288 return false;
6291 /* Invalidate assumptions made by dependence analysis when vectorization
6292 on the unrolled body effectively re-orders stmts. */
6293 if (ncopies > 1
6294 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6295 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6296 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6298 if (dump_enabled_p ())
6299 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6300 "cannot perform implicit CSE when unrolling "
6301 "with negative dependence distance\n");
6302 return false;
6305 elem_type = TREE_TYPE (vectype);
6306 mode = TYPE_MODE (vectype);
6308 /* FORNOW. In some cases can vectorize even if data-type not supported
6309 (e.g. - data copies). */
6310 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6312 if (dump_enabled_p ())
6313 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6314 "Aligned load, but unsupported type.\n");
6315 return false;
6318 /* Check if the load is a part of an interleaving chain. */
6319 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6321 grouped_load = true;
6322 /* FORNOW */
6323 gcc_assert (!nested_in_vect_loop && !STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6325 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6327 /* If this is single-element interleaving with an element distance
6328 that leaves unused vector loads around punt - we at least create
6329 very sub-optimal code in that case (and blow up memory,
6330 see PR65518). */
6331 bool force_peeling = false;
6332 if (first_stmt == stmt
6333 && !GROUP_NEXT_ELEMENT (stmt_info))
6335 if (GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
6337 if (dump_enabled_p ())
6338 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6339 "single-element interleaving not supported "
6340 "for not adjacent vector loads\n");
6341 return false;
6344 /* Single-element interleaving requires peeling for gaps. */
6345 force_peeling = true;
6348 /* If there is a gap in the end of the group or the group size cannot
6349 be made a multiple of the vector element count then we access excess
6350 elements in the last iteration and thus need to peel that off. */
6351 if (loop_vinfo
6352 && ! STMT_VINFO_STRIDED_P (stmt_info)
6353 && (force_peeling
6354 || GROUP_GAP (vinfo_for_stmt (first_stmt)) != 0
6355 || (!slp && vf % GROUP_SIZE (vinfo_for_stmt (first_stmt)) != 0)))
6357 if (dump_enabled_p ())
6358 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6359 "Data access with gaps requires scalar "
6360 "epilogue loop\n");
6361 if (loop->inner)
6363 if (dump_enabled_p ())
6364 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6365 "Peeling for outer loop is not supported\n");
6366 return false;
6369 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
6372 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6373 slp_perm = true;
6375 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6377 /* ??? The following is overly pessimistic (as well as the loop
6378 case above) in the case we can statically determine the excess
6379 elements loaded are within the bounds of a decl that is accessed.
6380 Likewise for BB vectorizations using masked loads is a possibility. */
6381 if (bb_vinfo && slp_perm && group_size % nunits != 0)
6383 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6384 "BB vectorization with gaps at the end of a load "
6385 "is not supported\n");
6386 return false;
6389 if (!slp
6390 && !PURE_SLP_STMT (stmt_info)
6391 && !STMT_VINFO_STRIDED_P (stmt_info))
6393 if (vect_load_lanes_supported (vectype, group_size))
6394 load_lanes_p = true;
6395 else if (!vect_grouped_load_supported (vectype, group_size))
6396 return false;
6399 /* Invalidate assumptions made by dependence analysis when vectorization
6400 on the unrolled body effectively re-orders stmts. */
6401 if (!PURE_SLP_STMT (stmt_info)
6402 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6403 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6404 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6406 if (dump_enabled_p ())
6407 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6408 "cannot perform implicit CSE when performing "
6409 "group loads with negative dependence distance\n");
6410 return false;
6413 /* Similarly when the stmt is a load that is both part of a SLP
6414 instance and a loop vectorized stmt via the same-dr mechanism
6415 we have to give up. */
6416 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6417 && (STMT_SLP_TYPE (stmt_info)
6418 != STMT_SLP_TYPE (vinfo_for_stmt
6419 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6421 if (dump_enabled_p ())
6422 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6423 "conflicting SLP types for CSEd load\n");
6424 return false;
6429 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6431 gimple *def_stmt;
6432 gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
6433 &gather_off, &gather_scale);
6434 gcc_assert (gather_decl);
6435 if (!vect_is_simple_use (gather_off, vinfo, &def_stmt, &gather_dt,
6436 &gather_off_vectype))
6438 if (dump_enabled_p ())
6439 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6440 "gather index use not simple.\n");
6441 return false;
6444 else if (STMT_VINFO_STRIDED_P (stmt_info))
6446 if ((grouped_load
6447 && (slp || PURE_SLP_STMT (stmt_info)))
6448 && (group_size > nunits
6449 || nunits % group_size != 0))
6451 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6452 "unhandled strided group load\n");
6453 return false;
6456 else
6458 negative = tree_int_cst_compare (nested_in_vect_loop
6459 ? STMT_VINFO_DR_STEP (stmt_info)
6460 : DR_STEP (dr),
6461 size_zero_node) < 0;
6462 if (negative && ncopies > 1)
6464 if (dump_enabled_p ())
6465 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6466 "multiple types with negative step.\n");
6467 return false;
6470 if (negative)
6472 if (grouped_load)
6474 if (dump_enabled_p ())
6475 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6476 "negative step for group load not supported"
6477 "\n");
6478 return false;
6480 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
6481 if (alignment_support_scheme != dr_aligned
6482 && alignment_support_scheme != dr_unaligned_supported)
6484 if (dump_enabled_p ())
6485 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6486 "negative step but alignment required.\n");
6487 return false;
6489 if (!perm_mask_for_reverse (vectype))
6491 if (dump_enabled_p ())
6492 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6493 "negative step and reversing not supported."
6494 "\n");
6495 return false;
6500 if (!vec_stmt) /* transformation not required. */
6502 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6503 /* The SLP costs are calculated during SLP analysis. */
6504 if (!PURE_SLP_STMT (stmt_info))
6505 vect_model_load_cost (stmt_info, ncopies, load_lanes_p,
6506 NULL, NULL, NULL);
6507 return true;
6510 if (dump_enabled_p ())
6511 dump_printf_loc (MSG_NOTE, vect_location,
6512 "transform load. ncopies = %d\n", ncopies);
6514 /** Transform. **/
6516 ensure_base_align (stmt_info, dr);
6518 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6520 tree vec_oprnd0 = NULL_TREE, op;
6521 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
6522 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6523 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6524 edge pe = loop_preheader_edge (loop);
6525 gimple_seq seq;
6526 basic_block new_bb;
6527 enum { NARROW, NONE, WIDEN } modifier;
6528 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
6530 if (nunits == gather_off_nunits)
6531 modifier = NONE;
6532 else if (nunits == gather_off_nunits / 2)
6534 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6535 modifier = WIDEN;
6537 for (i = 0; i < gather_off_nunits; ++i)
6538 sel[i] = i | nunits;
6540 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
6542 else if (nunits == gather_off_nunits * 2)
6544 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6545 modifier = NARROW;
6547 for (i = 0; i < nunits; ++i)
6548 sel[i] = i < gather_off_nunits
6549 ? i : i + nunits - gather_off_nunits;
6551 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6552 ncopies *= 2;
6554 else
6555 gcc_unreachable ();
6557 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
6558 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6559 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6560 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6561 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6562 scaletype = TREE_VALUE (arglist);
6563 gcc_checking_assert (types_compatible_p (srctype, rettype));
6565 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6567 ptr = fold_convert (ptrtype, gather_base);
6568 if (!is_gimple_min_invariant (ptr))
6570 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6571 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6572 gcc_assert (!new_bb);
6575 /* Currently we support only unconditional gather loads,
6576 so mask should be all ones. */
6577 if (TREE_CODE (masktype) == INTEGER_TYPE)
6578 mask = build_int_cst (masktype, -1);
6579 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6581 mask = build_int_cst (TREE_TYPE (masktype), -1);
6582 mask = build_vector_from_val (masktype, mask);
6583 mask = vect_init_vector (stmt, mask, masktype, NULL);
6585 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6587 REAL_VALUE_TYPE r;
6588 long tmp[6];
6589 for (j = 0; j < 6; ++j)
6590 tmp[j] = -1;
6591 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6592 mask = build_real (TREE_TYPE (masktype), r);
6593 mask = build_vector_from_val (masktype, mask);
6594 mask = vect_init_vector (stmt, mask, masktype, NULL);
6596 else
6597 gcc_unreachable ();
6599 scale = build_int_cst (scaletype, gather_scale);
6601 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6602 merge = build_int_cst (TREE_TYPE (rettype), 0);
6603 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6605 REAL_VALUE_TYPE r;
6606 long tmp[6];
6607 for (j = 0; j < 6; ++j)
6608 tmp[j] = 0;
6609 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6610 merge = build_real (TREE_TYPE (rettype), r);
6612 else
6613 gcc_unreachable ();
6614 merge = build_vector_from_val (rettype, merge);
6615 merge = vect_init_vector (stmt, merge, rettype, NULL);
6617 prev_stmt_info = NULL;
6618 for (j = 0; j < ncopies; ++j)
6620 if (modifier == WIDEN && (j & 1))
6621 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6622 perm_mask, stmt, gsi);
6623 else if (j == 0)
6624 op = vec_oprnd0
6625 = vect_get_vec_def_for_operand (gather_off, stmt);
6626 else
6627 op = vec_oprnd0
6628 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6630 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6632 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6633 == TYPE_VECTOR_SUBPARTS (idxtype));
6634 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6635 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6636 new_stmt
6637 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6638 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6639 op = var;
6642 new_stmt
6643 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
6645 if (!useless_type_conversion_p (vectype, rettype))
6647 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6648 == TYPE_VECTOR_SUBPARTS (rettype));
6649 op = vect_get_new_ssa_name (rettype, vect_simple_var);
6650 gimple_call_set_lhs (new_stmt, op);
6651 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6652 var = make_ssa_name (vec_dest);
6653 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6654 new_stmt
6655 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6657 else
6659 var = make_ssa_name (vec_dest, new_stmt);
6660 gimple_call_set_lhs (new_stmt, var);
6663 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6665 if (modifier == NARROW)
6667 if ((j & 1) == 0)
6669 prev_res = var;
6670 continue;
6672 var = permute_vec_elements (prev_res, var,
6673 perm_mask, stmt, gsi);
6674 new_stmt = SSA_NAME_DEF_STMT (var);
6677 if (prev_stmt_info == NULL)
6678 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6679 else
6680 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6681 prev_stmt_info = vinfo_for_stmt (new_stmt);
6683 return true;
6685 else if (STMT_VINFO_STRIDED_P (stmt_info))
6687 gimple_stmt_iterator incr_gsi;
6688 bool insert_after;
6689 gimple *incr;
6690 tree offvar;
6691 tree ivstep;
6692 tree running_off;
6693 vec<constructor_elt, va_gc> *v = NULL;
6694 gimple_seq stmts = NULL;
6695 tree stride_base, stride_step, alias_off;
6697 gcc_assert (!nested_in_vect_loop);
6699 if (slp && grouped_load)
6700 first_dr = STMT_VINFO_DATA_REF
6701 (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
6702 else
6703 first_dr = dr;
6705 stride_base
6706 = fold_build_pointer_plus
6707 (DR_BASE_ADDRESS (first_dr),
6708 size_binop (PLUS_EXPR,
6709 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6710 convert_to_ptrofftype (DR_INIT (first_dr))));
6711 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6713 /* For a load with loop-invariant (but other than power-of-2)
6714 stride (i.e. not a grouped access) like so:
6716 for (i = 0; i < n; i += stride)
6717 ... = array[i];
6719 we generate a new induction variable and new accesses to
6720 form a new vector (or vectors, depending on ncopies):
6722 for (j = 0; ; j += VF*stride)
6723 tmp1 = array[j];
6724 tmp2 = array[j + stride];
6726 vectemp = {tmp1, tmp2, ...}
6729 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6730 build_int_cst (TREE_TYPE (stride_step), vf));
6732 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6734 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
6735 loop, &incr_gsi, insert_after,
6736 &offvar, NULL);
6737 incr = gsi_stmt (incr_gsi);
6738 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6740 stride_step = force_gimple_operand (unshare_expr (stride_step),
6741 &stmts, true, NULL_TREE);
6742 if (stmts)
6743 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6745 prev_stmt_info = NULL;
6746 running_off = offvar;
6747 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
6748 int nloads = nunits;
6749 tree ltype = TREE_TYPE (vectype);
6750 auto_vec<tree> dr_chain;
6751 if (slp)
6753 nloads = nunits / group_size;
6754 if (group_size < nunits)
6755 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
6756 else
6757 ltype = vectype;
6758 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
6759 /* For SLP permutation support we need to load the whole group,
6760 not only the number of vector stmts the permutation result
6761 fits in. */
6762 if (slp_perm)
6764 ncopies = (group_size * vf + nunits - 1) / nunits;
6765 dr_chain.create (ncopies);
6767 else
6768 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6770 for (j = 0; j < ncopies; j++)
6772 tree vec_inv;
6774 if (nloads > 1)
6776 vec_alloc (v, nloads);
6777 for (i = 0; i < nloads; i++)
6779 tree newref, newoff;
6780 gimple *incr;
6781 newref = build2 (MEM_REF, ltype, running_off, alias_off);
6783 newref = force_gimple_operand_gsi (gsi, newref, true,
6784 NULL_TREE, true,
6785 GSI_SAME_STMT);
6786 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6787 newoff = copy_ssa_name (running_off);
6788 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6789 running_off, stride_step);
6790 vect_finish_stmt_generation (stmt, incr, gsi);
6792 running_off = newoff;
6795 vec_inv = build_constructor (vectype, v);
6796 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6797 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6799 else
6801 new_stmt = gimple_build_assign (make_ssa_name (ltype),
6802 build2 (MEM_REF, ltype,
6803 running_off, alias_off));
6804 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6806 tree newoff = copy_ssa_name (running_off);
6807 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6808 running_off, stride_step);
6809 vect_finish_stmt_generation (stmt, incr, gsi);
6811 running_off = newoff;
6814 if (slp)
6816 if (slp_perm)
6817 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
6818 else
6819 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6821 else
6823 if (j == 0)
6824 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6825 else
6826 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6827 prev_stmt_info = vinfo_for_stmt (new_stmt);
6830 if (slp_perm)
6831 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6832 slp_node_instance, false);
6833 return true;
6836 if (grouped_load)
6838 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6839 /* For SLP vectorization we directly vectorize a subchain
6840 without permutation. */
6841 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6842 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6843 /* For BB vectorization always use the first stmt to base
6844 the data ref pointer on. */
6845 if (bb_vinfo)
6846 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6848 /* Check if the chain of loads is already vectorized. */
6849 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6850 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6851 ??? But we can only do so if there is exactly one
6852 as we have no way to get at the rest. Leave the CSE
6853 opportunity alone.
6854 ??? With the group load eventually participating
6855 in multiple different permutations (having multiple
6856 slp nodes which refer to the same group) the CSE
6857 is even wrong code. See PR56270. */
6858 && !slp)
6860 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6861 return true;
6863 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6864 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6865 group_gap_adj = 0;
6867 /* VEC_NUM is the number of vect stmts to be created for this group. */
6868 if (slp)
6870 grouped_load = false;
6871 /* For SLP permutation support we need to load the whole group,
6872 not only the number of vector stmts the permutation result
6873 fits in. */
6874 if (slp_perm)
6875 vec_num = (group_size * vf + nunits - 1) / nunits;
6876 else
6877 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6878 group_gap_adj = vf * group_size - nunits * vec_num;
6880 else
6881 vec_num = group_size;
6883 else
6885 first_stmt = stmt;
6886 first_dr = dr;
6887 group_size = vec_num = 1;
6888 group_gap_adj = 0;
6891 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6892 gcc_assert (alignment_support_scheme);
6893 /* Targets with load-lane instructions must not require explicit
6894 realignment. */
6895 gcc_assert (!load_lanes_p
6896 || alignment_support_scheme == dr_aligned
6897 || alignment_support_scheme == dr_unaligned_supported);
6899 /* In case the vectorization factor (VF) is bigger than the number
6900 of elements that we can fit in a vectype (nunits), we have to generate
6901 more than one vector stmt - i.e - we need to "unroll" the
6902 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6903 from one copy of the vector stmt to the next, in the field
6904 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6905 stages to find the correct vector defs to be used when vectorizing
6906 stmts that use the defs of the current stmt. The example below
6907 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6908 need to create 4 vectorized stmts):
6910 before vectorization:
6911 RELATED_STMT VEC_STMT
6912 S1: x = memref - -
6913 S2: z = x + 1 - -
6915 step 1: vectorize stmt S1:
6916 We first create the vector stmt VS1_0, and, as usual, record a
6917 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6918 Next, we create the vector stmt VS1_1, and record a pointer to
6919 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6920 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6921 stmts and pointers:
6922 RELATED_STMT VEC_STMT
6923 VS1_0: vx0 = memref0 VS1_1 -
6924 VS1_1: vx1 = memref1 VS1_2 -
6925 VS1_2: vx2 = memref2 VS1_3 -
6926 VS1_3: vx3 = memref3 - -
6927 S1: x = load - VS1_0
6928 S2: z = x + 1 - -
6930 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6931 information we recorded in RELATED_STMT field is used to vectorize
6932 stmt S2. */
6934 /* In case of interleaving (non-unit grouped access):
6936 S1: x2 = &base + 2
6937 S2: x0 = &base
6938 S3: x1 = &base + 1
6939 S4: x3 = &base + 3
6941 Vectorized loads are created in the order of memory accesses
6942 starting from the access of the first stmt of the chain:
6944 VS1: vx0 = &base
6945 VS2: vx1 = &base + vec_size*1
6946 VS3: vx3 = &base + vec_size*2
6947 VS4: vx4 = &base + vec_size*3
6949 Then permutation statements are generated:
6951 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6952 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6955 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6956 (the order of the data-refs in the output of vect_permute_load_chain
6957 corresponds to the order of scalar stmts in the interleaving chain - see
6958 the documentation of vect_permute_load_chain()).
6959 The generation of permutation stmts and recording them in
6960 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6962 In case of both multiple types and interleaving, the vector loads and
6963 permutation stmts above are created for every copy. The result vector
6964 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6965 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6967 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6968 on a target that supports unaligned accesses (dr_unaligned_supported)
6969 we generate the following code:
6970 p = initial_addr;
6971 indx = 0;
6972 loop {
6973 p = p + indx * vectype_size;
6974 vec_dest = *(p);
6975 indx = indx + 1;
6978 Otherwise, the data reference is potentially unaligned on a target that
6979 does not support unaligned accesses (dr_explicit_realign_optimized) -
6980 then generate the following code, in which the data in each iteration is
6981 obtained by two vector loads, one from the previous iteration, and one
6982 from the current iteration:
6983 p1 = initial_addr;
6984 msq_init = *(floor(p1))
6985 p2 = initial_addr + VS - 1;
6986 realignment_token = call target_builtin;
6987 indx = 0;
6988 loop {
6989 p2 = p2 + indx * vectype_size
6990 lsq = *(floor(p2))
6991 vec_dest = realign_load (msq, lsq, realignment_token)
6992 indx = indx + 1;
6993 msq = lsq;
6994 } */
6996 /* If the misalignment remains the same throughout the execution of the
6997 loop, we can create the init_addr and permutation mask at the loop
6998 preheader. Otherwise, it needs to be created inside the loop.
6999 This can only occur when vectorizing memory accesses in the inner-loop
7000 nested within an outer-loop that is being vectorized. */
7002 if (nested_in_vect_loop
7003 && (TREE_INT_CST_LOW (DR_STEP (dr))
7004 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
7006 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7007 compute_in_loop = true;
7010 if ((alignment_support_scheme == dr_explicit_realign_optimized
7011 || alignment_support_scheme == dr_explicit_realign)
7012 && !compute_in_loop)
7014 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7015 alignment_support_scheme, NULL_TREE,
7016 &at_loop);
7017 if (alignment_support_scheme == dr_explicit_realign_optimized)
7019 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7020 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7021 size_one_node);
7024 else
7025 at_loop = loop;
7027 if (negative)
7028 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7030 if (load_lanes_p)
7031 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7032 else
7033 aggr_type = vectype;
7035 prev_stmt_info = NULL;
7036 for (j = 0; j < ncopies; j++)
7038 /* 1. Create the vector or array pointer update chain. */
7039 if (j == 0)
7041 bool simd_lane_access_p
7042 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7043 if (simd_lane_access_p
7044 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7045 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7046 && integer_zerop (DR_OFFSET (first_dr))
7047 && integer_zerop (DR_INIT (first_dr))
7048 && alias_sets_conflict_p (get_alias_set (aggr_type),
7049 get_alias_set (DR_REF (first_dr)))
7050 && (alignment_support_scheme == dr_aligned
7051 || alignment_support_scheme == dr_unaligned_supported))
7053 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7054 dataref_offset = build_int_cst (reference_alias_ptr_type
7055 (DR_REF (first_dr)), 0);
7056 inv_p = false;
7058 else if (first_stmt_for_drptr
7059 && first_stmt != first_stmt_for_drptr)
7061 dataref_ptr
7062 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7063 at_loop, offset, &dummy, gsi,
7064 &ptr_incr, simd_lane_access_p,
7065 &inv_p, byte_offset);
7066 /* Adjust the pointer by the difference to first_stmt. */
7067 data_reference_p ptrdr
7068 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7069 tree diff = fold_convert (sizetype,
7070 size_binop (MINUS_EXPR,
7071 DR_INIT (first_dr),
7072 DR_INIT (ptrdr)));
7073 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7074 stmt, diff);
7076 else
7077 dataref_ptr
7078 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7079 offset, &dummy, gsi, &ptr_incr,
7080 simd_lane_access_p, &inv_p,
7081 byte_offset);
7083 else if (dataref_offset)
7084 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7085 TYPE_SIZE_UNIT (aggr_type));
7086 else
7087 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7088 TYPE_SIZE_UNIT (aggr_type));
7090 if (grouped_load || slp_perm)
7091 dr_chain.create (vec_num);
7093 if (load_lanes_p)
7095 tree vec_array;
7097 vec_array = create_vector_array (vectype, vec_num);
7099 /* Emit:
7100 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7101 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
7102 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
7103 gimple_call_set_lhs (new_stmt, vec_array);
7104 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7106 /* Extract each vector into an SSA_NAME. */
7107 for (i = 0; i < vec_num; i++)
7109 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7110 vec_array, i);
7111 dr_chain.quick_push (new_temp);
7114 /* Record the mapping between SSA_NAMEs and statements. */
7115 vect_record_grouped_load_vectors (stmt, dr_chain);
7117 else
7119 for (i = 0; i < vec_num; i++)
7121 if (i > 0)
7122 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7123 stmt, NULL_TREE);
7125 /* 2. Create the vector-load in the loop. */
7126 switch (alignment_support_scheme)
7128 case dr_aligned:
7129 case dr_unaligned_supported:
7131 unsigned int align, misalign;
7133 data_ref
7134 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7135 dataref_offset
7136 ? dataref_offset
7137 : build_int_cst (reference_alias_ptr_type
7138 (DR_REF (first_dr)), 0));
7139 align = TYPE_ALIGN_UNIT (vectype);
7140 if (alignment_support_scheme == dr_aligned)
7142 gcc_assert (aligned_access_p (first_dr));
7143 misalign = 0;
7145 else if (DR_MISALIGNMENT (first_dr) == -1)
7147 if (DR_VECT_AUX (first_dr)->base_element_aligned)
7148 align = TYPE_ALIGN_UNIT (elem_type);
7149 else
7150 align = (get_object_alignment (DR_REF (first_dr))
7151 / BITS_PER_UNIT);
7152 misalign = 0;
7153 TREE_TYPE (data_ref)
7154 = build_aligned_type (TREE_TYPE (data_ref),
7155 align * BITS_PER_UNIT);
7157 else
7159 TREE_TYPE (data_ref)
7160 = build_aligned_type (TREE_TYPE (data_ref),
7161 TYPE_ALIGN (elem_type));
7162 misalign = DR_MISALIGNMENT (first_dr);
7164 if (dataref_offset == NULL_TREE
7165 && TREE_CODE (dataref_ptr) == SSA_NAME)
7166 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7167 align, misalign);
7168 break;
7170 case dr_explicit_realign:
7172 tree ptr, bump;
7174 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7176 if (compute_in_loop)
7177 msq = vect_setup_realignment (first_stmt, gsi,
7178 &realignment_token,
7179 dr_explicit_realign,
7180 dataref_ptr, NULL);
7182 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7183 ptr = copy_ssa_name (dataref_ptr);
7184 else
7185 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7186 new_stmt = gimple_build_assign
7187 (ptr, BIT_AND_EXPR, dataref_ptr,
7188 build_int_cst
7189 (TREE_TYPE (dataref_ptr),
7190 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7191 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7192 data_ref
7193 = build2 (MEM_REF, vectype, ptr,
7194 build_int_cst (reference_alias_ptr_type
7195 (DR_REF (first_dr)), 0));
7196 vec_dest = vect_create_destination_var (scalar_dest,
7197 vectype);
7198 new_stmt = gimple_build_assign (vec_dest, data_ref);
7199 new_temp = make_ssa_name (vec_dest, new_stmt);
7200 gimple_assign_set_lhs (new_stmt, new_temp);
7201 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7202 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7203 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7204 msq = new_temp;
7206 bump = size_binop (MULT_EXPR, vs,
7207 TYPE_SIZE_UNIT (elem_type));
7208 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7209 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7210 new_stmt = gimple_build_assign
7211 (NULL_TREE, BIT_AND_EXPR, ptr,
7212 build_int_cst
7213 (TREE_TYPE (ptr),
7214 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7215 ptr = copy_ssa_name (ptr, new_stmt);
7216 gimple_assign_set_lhs (new_stmt, ptr);
7217 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7218 data_ref
7219 = build2 (MEM_REF, vectype, ptr,
7220 build_int_cst (reference_alias_ptr_type
7221 (DR_REF (first_dr)), 0));
7222 break;
7224 case dr_explicit_realign_optimized:
7225 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7226 new_temp = copy_ssa_name (dataref_ptr);
7227 else
7228 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7229 new_stmt = gimple_build_assign
7230 (new_temp, BIT_AND_EXPR, dataref_ptr,
7231 build_int_cst
7232 (TREE_TYPE (dataref_ptr),
7233 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7234 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7235 data_ref
7236 = build2 (MEM_REF, vectype, new_temp,
7237 build_int_cst (reference_alias_ptr_type
7238 (DR_REF (first_dr)), 0));
7239 break;
7240 default:
7241 gcc_unreachable ();
7243 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7244 new_stmt = gimple_build_assign (vec_dest, data_ref);
7245 new_temp = make_ssa_name (vec_dest, new_stmt);
7246 gimple_assign_set_lhs (new_stmt, new_temp);
7247 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7249 /* 3. Handle explicit realignment if necessary/supported.
7250 Create in loop:
7251 vec_dest = realign_load (msq, lsq, realignment_token) */
7252 if (alignment_support_scheme == dr_explicit_realign_optimized
7253 || alignment_support_scheme == dr_explicit_realign)
7255 lsq = gimple_assign_lhs (new_stmt);
7256 if (!realignment_token)
7257 realignment_token = dataref_ptr;
7258 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7259 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7260 msq, lsq, realignment_token);
7261 new_temp = make_ssa_name (vec_dest, new_stmt);
7262 gimple_assign_set_lhs (new_stmt, new_temp);
7263 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7265 if (alignment_support_scheme == dr_explicit_realign_optimized)
7267 gcc_assert (phi);
7268 if (i == vec_num - 1 && j == ncopies - 1)
7269 add_phi_arg (phi, lsq,
7270 loop_latch_edge (containing_loop),
7271 UNKNOWN_LOCATION);
7272 msq = lsq;
7276 /* 4. Handle invariant-load. */
7277 if (inv_p && !bb_vinfo)
7279 gcc_assert (!grouped_load);
7280 /* If we have versioned for aliasing or the loop doesn't
7281 have any data dependencies that would preclude this,
7282 then we are sure this is a loop invariant load and
7283 thus we can insert it on the preheader edge. */
7284 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7285 && !nested_in_vect_loop
7286 && hoist_defs_of_uses (stmt, loop))
7288 if (dump_enabled_p ())
7290 dump_printf_loc (MSG_NOTE, vect_location,
7291 "hoisting out of the vectorized "
7292 "loop: ");
7293 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7295 tree tem = copy_ssa_name (scalar_dest);
7296 gsi_insert_on_edge_immediate
7297 (loop_preheader_edge (loop),
7298 gimple_build_assign (tem,
7299 unshare_expr
7300 (gimple_assign_rhs1 (stmt))));
7301 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7302 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7303 set_vinfo_for_stmt (new_stmt,
7304 new_stmt_vec_info (new_stmt, vinfo));
7306 else
7308 gimple_stmt_iterator gsi2 = *gsi;
7309 gsi_next (&gsi2);
7310 new_temp = vect_init_vector (stmt, scalar_dest,
7311 vectype, &gsi2);
7312 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7316 if (negative)
7318 tree perm_mask = perm_mask_for_reverse (vectype);
7319 new_temp = permute_vec_elements (new_temp, new_temp,
7320 perm_mask, stmt, gsi);
7321 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7324 /* Collect vector loads and later create their permutation in
7325 vect_transform_grouped_load (). */
7326 if (grouped_load || slp_perm)
7327 dr_chain.quick_push (new_temp);
7329 /* Store vector loads in the corresponding SLP_NODE. */
7330 if (slp && !slp_perm)
7331 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7333 /* Bump the vector pointer to account for a gap or for excess
7334 elements loaded for a permuted SLP load. */
7335 if (group_gap_adj != 0)
7337 bool ovf;
7338 tree bump
7339 = wide_int_to_tree (sizetype,
7340 wi::smul (TYPE_SIZE_UNIT (elem_type),
7341 group_gap_adj, &ovf));
7342 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7343 stmt, bump);
7347 if (slp && !slp_perm)
7348 continue;
7350 if (slp_perm)
7352 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7353 slp_node_instance, false))
7355 dr_chain.release ();
7356 return false;
7359 else
7361 if (grouped_load)
7363 if (!load_lanes_p)
7364 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7365 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7367 else
7369 if (j == 0)
7370 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7371 else
7372 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7373 prev_stmt_info = vinfo_for_stmt (new_stmt);
7376 dr_chain.release ();
7379 return true;
7382 /* Function vect_is_simple_cond.
7384 Input:
7385 LOOP - the loop that is being vectorized.
7386 COND - Condition that is checked for simple use.
7388 Output:
7389 *COMP_VECTYPE - the vector type for the comparison.
7391 Returns whether a COND can be vectorized. Checks whether
7392 condition operands are supportable using vec_is_simple_use. */
7394 static bool
7395 vect_is_simple_cond (tree cond, vec_info *vinfo, tree *comp_vectype)
7397 tree lhs, rhs;
7398 enum vect_def_type dt;
7399 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7401 /* Mask case. */
7402 if (TREE_CODE (cond) == SSA_NAME
7403 && TREE_CODE (TREE_TYPE (cond)) == BOOLEAN_TYPE)
7405 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7406 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7407 &dt, comp_vectype)
7408 || !*comp_vectype
7409 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7410 return false;
7411 return true;
7414 if (!COMPARISON_CLASS_P (cond))
7415 return false;
7417 lhs = TREE_OPERAND (cond, 0);
7418 rhs = TREE_OPERAND (cond, 1);
7420 if (TREE_CODE (lhs) == SSA_NAME)
7422 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7423 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dt, &vectype1))
7424 return false;
7426 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
7427 && TREE_CODE (lhs) != FIXED_CST)
7428 return false;
7430 if (TREE_CODE (rhs) == SSA_NAME)
7432 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7433 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dt, &vectype2))
7434 return false;
7436 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
7437 && TREE_CODE (rhs) != FIXED_CST)
7438 return false;
7440 if (vectype1 && vectype2
7441 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7442 return false;
7444 *comp_vectype = vectype1 ? vectype1 : vectype2;
7445 return true;
7448 /* vectorizable_condition.
7450 Check if STMT is conditional modify expression that can be vectorized.
7451 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7452 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7453 at GSI.
7455 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7456 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7457 else clause if it is 2).
7459 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7461 bool
7462 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7463 gimple **vec_stmt, tree reduc_def, int reduc_index,
7464 slp_tree slp_node)
7466 tree scalar_dest = NULL_TREE;
7467 tree vec_dest = NULL_TREE;
7468 tree cond_expr, then_clause, else_clause;
7469 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7470 tree comp_vectype = NULL_TREE;
7471 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7472 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7473 tree vec_compare;
7474 tree new_temp;
7475 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7476 enum vect_def_type dt, dts[4];
7477 int ncopies;
7478 enum tree_code code;
7479 stmt_vec_info prev_stmt_info = NULL;
7480 int i, j;
7481 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7482 vec<tree> vec_oprnds0 = vNULL;
7483 vec<tree> vec_oprnds1 = vNULL;
7484 vec<tree> vec_oprnds2 = vNULL;
7485 vec<tree> vec_oprnds3 = vNULL;
7486 tree vec_cmp_type;
7487 bool masked = false;
7489 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7490 return false;
7492 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7494 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7495 return false;
7497 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7498 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7499 && reduc_def))
7500 return false;
7502 /* FORNOW: not yet supported. */
7503 if (STMT_VINFO_LIVE_P (stmt_info))
7505 if (dump_enabled_p ())
7506 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7507 "value used after loop.\n");
7508 return false;
7512 /* Is vectorizable conditional operation? */
7513 if (!is_gimple_assign (stmt))
7514 return false;
7516 code = gimple_assign_rhs_code (stmt);
7518 if (code != COND_EXPR)
7519 return false;
7521 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7522 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
7523 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7525 if (slp_node || PURE_SLP_STMT (stmt_info))
7526 ncopies = 1;
7527 else
7528 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7530 gcc_assert (ncopies >= 1);
7531 if (reduc_index && ncopies > 1)
7532 return false; /* FORNOW */
7534 cond_expr = gimple_assign_rhs1 (stmt);
7535 then_clause = gimple_assign_rhs2 (stmt);
7536 else_clause = gimple_assign_rhs3 (stmt);
7538 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, &comp_vectype)
7539 || !comp_vectype)
7540 return false;
7542 gimple *def_stmt;
7543 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dt,
7544 &vectype1))
7545 return false;
7546 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dt,
7547 &vectype2))
7548 return false;
7550 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7551 return false;
7553 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
7554 return false;
7556 masked = !COMPARISON_CLASS_P (cond_expr);
7557 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7559 if (vec_cmp_type == NULL_TREE)
7560 return false;
7562 if (!vec_stmt)
7564 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7565 return expand_vec_cond_expr_p (vectype, comp_vectype);
7568 /* Transform. */
7570 if (!slp_node)
7572 vec_oprnds0.create (1);
7573 vec_oprnds1.create (1);
7574 vec_oprnds2.create (1);
7575 vec_oprnds3.create (1);
7578 /* Handle def. */
7579 scalar_dest = gimple_assign_lhs (stmt);
7580 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7582 /* Handle cond expr. */
7583 for (j = 0; j < ncopies; j++)
7585 gassign *new_stmt = NULL;
7586 if (j == 0)
7588 if (slp_node)
7590 auto_vec<tree, 4> ops;
7591 auto_vec<vec<tree>, 4> vec_defs;
7593 if (masked)
7594 ops.safe_push (cond_expr);
7595 else
7597 ops.safe_push (TREE_OPERAND (cond_expr, 0));
7598 ops.safe_push (TREE_OPERAND (cond_expr, 1));
7600 ops.safe_push (then_clause);
7601 ops.safe_push (else_clause);
7602 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7603 vec_oprnds3 = vec_defs.pop ();
7604 vec_oprnds2 = vec_defs.pop ();
7605 if (!masked)
7606 vec_oprnds1 = vec_defs.pop ();
7607 vec_oprnds0 = vec_defs.pop ();
7609 ops.release ();
7610 vec_defs.release ();
7612 else
7614 gimple *gtemp;
7615 if (masked)
7617 vec_cond_lhs
7618 = vect_get_vec_def_for_operand (cond_expr, stmt,
7619 comp_vectype);
7620 vect_is_simple_use (cond_expr, stmt_info->vinfo,
7621 &gtemp, &dts[0]);
7623 else
7625 vec_cond_lhs =
7626 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
7627 stmt, comp_vectype);
7628 vect_is_simple_use (TREE_OPERAND (cond_expr, 0),
7629 loop_vinfo, &gtemp, &dts[0]);
7631 vec_cond_rhs =
7632 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
7633 stmt, comp_vectype);
7634 vect_is_simple_use (TREE_OPERAND (cond_expr, 1),
7635 loop_vinfo, &gtemp, &dts[1]);
7637 if (reduc_index == 1)
7638 vec_then_clause = reduc_def;
7639 else
7641 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
7642 stmt);
7643 vect_is_simple_use (then_clause, loop_vinfo,
7644 &gtemp, &dts[2]);
7646 if (reduc_index == 2)
7647 vec_else_clause = reduc_def;
7648 else
7650 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
7651 stmt);
7652 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
7656 else
7658 vec_cond_lhs
7659 = vect_get_vec_def_for_stmt_copy (dts[0],
7660 vec_oprnds0.pop ());
7661 if (!masked)
7662 vec_cond_rhs
7663 = vect_get_vec_def_for_stmt_copy (dts[1],
7664 vec_oprnds1.pop ());
7666 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
7667 vec_oprnds2.pop ());
7668 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
7669 vec_oprnds3.pop ());
7672 if (!slp_node)
7674 vec_oprnds0.quick_push (vec_cond_lhs);
7675 if (!masked)
7676 vec_oprnds1.quick_push (vec_cond_rhs);
7677 vec_oprnds2.quick_push (vec_then_clause);
7678 vec_oprnds3.quick_push (vec_else_clause);
7681 /* Arguments are ready. Create the new vector stmt. */
7682 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
7684 vec_then_clause = vec_oprnds2[i];
7685 vec_else_clause = vec_oprnds3[i];
7687 if (masked)
7688 vec_compare = vec_cond_lhs;
7689 else
7691 vec_cond_rhs = vec_oprnds1[i];
7692 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
7693 vec_cond_lhs, vec_cond_rhs);
7695 new_temp = make_ssa_name (vec_dest);
7696 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
7697 vec_compare, vec_then_clause,
7698 vec_else_clause);
7699 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7700 if (slp_node)
7701 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7704 if (slp_node)
7705 continue;
7707 if (j == 0)
7708 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7709 else
7710 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7712 prev_stmt_info = vinfo_for_stmt (new_stmt);
7715 vec_oprnds0.release ();
7716 vec_oprnds1.release ();
7717 vec_oprnds2.release ();
7718 vec_oprnds3.release ();
7720 return true;
7723 /* vectorizable_comparison.
7725 Check if STMT is comparison expression that can be vectorized.
7726 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7727 comparison, put it in VEC_STMT, and insert it at GSI.
7729 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7731 bool
7732 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
7733 gimple **vec_stmt, tree reduc_def,
7734 slp_tree slp_node)
7736 tree lhs, rhs1, rhs2;
7737 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7738 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7739 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7740 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
7741 tree new_temp;
7742 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7743 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
7744 unsigned nunits;
7745 int ncopies;
7746 enum tree_code code;
7747 stmt_vec_info prev_stmt_info = NULL;
7748 int i, j;
7749 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7750 vec<tree> vec_oprnds0 = vNULL;
7751 vec<tree> vec_oprnds1 = vNULL;
7752 gimple *def_stmt;
7753 tree mask_type;
7754 tree mask;
7756 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7757 return false;
7759 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
7760 return false;
7762 mask_type = vectype;
7763 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7765 if (slp_node || PURE_SLP_STMT (stmt_info))
7766 ncopies = 1;
7767 else
7768 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7770 gcc_assert (ncopies >= 1);
7771 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7772 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7773 && reduc_def))
7774 return false;
7776 if (STMT_VINFO_LIVE_P (stmt_info))
7778 if (dump_enabled_p ())
7779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7780 "value used after loop.\n");
7781 return false;
7784 if (!is_gimple_assign (stmt))
7785 return false;
7787 code = gimple_assign_rhs_code (stmt);
7789 if (TREE_CODE_CLASS (code) != tcc_comparison)
7790 return false;
7792 rhs1 = gimple_assign_rhs1 (stmt);
7793 rhs2 = gimple_assign_rhs2 (stmt);
7795 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
7796 &dts[0], &vectype1))
7797 return false;
7799 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
7800 &dts[1], &vectype2))
7801 return false;
7803 if (vectype1 && vectype2
7804 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7805 return false;
7807 vectype = vectype1 ? vectype1 : vectype2;
7809 /* Invariant comparison. */
7810 if (!vectype)
7812 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
7813 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
7814 return false;
7816 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
7817 return false;
7819 if (!vec_stmt)
7821 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
7822 vect_model_simple_cost (stmt_info, ncopies, dts, NULL, NULL);
7823 return expand_vec_cmp_expr_p (vectype, mask_type);
7826 /* Transform. */
7827 if (!slp_node)
7829 vec_oprnds0.create (1);
7830 vec_oprnds1.create (1);
7833 /* Handle def. */
7834 lhs = gimple_assign_lhs (stmt);
7835 mask = vect_create_destination_var (lhs, mask_type);
7837 /* Handle cmp expr. */
7838 for (j = 0; j < ncopies; j++)
7840 gassign *new_stmt = NULL;
7841 if (j == 0)
7843 if (slp_node)
7845 auto_vec<tree, 2> ops;
7846 auto_vec<vec<tree>, 2> vec_defs;
7848 ops.safe_push (rhs1);
7849 ops.safe_push (rhs2);
7850 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7851 vec_oprnds1 = vec_defs.pop ();
7852 vec_oprnds0 = vec_defs.pop ();
7854 else
7856 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
7857 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
7860 else
7862 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
7863 vec_oprnds0.pop ());
7864 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
7865 vec_oprnds1.pop ());
7868 if (!slp_node)
7870 vec_oprnds0.quick_push (vec_rhs1);
7871 vec_oprnds1.quick_push (vec_rhs2);
7874 /* Arguments are ready. Create the new vector stmt. */
7875 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
7877 vec_rhs2 = vec_oprnds1[i];
7879 new_temp = make_ssa_name (mask);
7880 new_stmt = gimple_build_assign (new_temp, code, vec_rhs1, vec_rhs2);
7881 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7882 if (slp_node)
7883 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7886 if (slp_node)
7887 continue;
7889 if (j == 0)
7890 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7891 else
7892 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7894 prev_stmt_info = vinfo_for_stmt (new_stmt);
7897 vec_oprnds0.release ();
7898 vec_oprnds1.release ();
7900 return true;
7903 /* Make sure the statement is vectorizable. */
7905 bool
7906 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
7908 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7909 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7910 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
7911 bool ok;
7912 tree scalar_type, vectype;
7913 gimple *pattern_stmt;
7914 gimple_seq pattern_def_seq;
7916 if (dump_enabled_p ())
7918 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
7919 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7922 if (gimple_has_volatile_ops (stmt))
7924 if (dump_enabled_p ())
7925 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7926 "not vectorized: stmt has volatile operands\n");
7928 return false;
7931 /* Skip stmts that do not need to be vectorized. In loops this is expected
7932 to include:
7933 - the COND_EXPR which is the loop exit condition
7934 - any LABEL_EXPRs in the loop
7935 - computations that are used only for array indexing or loop control.
7936 In basic blocks we only analyze statements that are a part of some SLP
7937 instance, therefore, all the statements are relevant.
7939 Pattern statement needs to be analyzed instead of the original statement
7940 if the original statement is not relevant. Otherwise, we analyze both
7941 statements. In basic blocks we are called from some SLP instance
7942 traversal, don't analyze pattern stmts instead, the pattern stmts
7943 already will be part of SLP instance. */
7945 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
7946 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7947 && !STMT_VINFO_LIVE_P (stmt_info))
7949 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7950 && pattern_stmt
7951 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7952 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7954 /* Analyze PATTERN_STMT instead of the original stmt. */
7955 stmt = pattern_stmt;
7956 stmt_info = vinfo_for_stmt (pattern_stmt);
7957 if (dump_enabled_p ())
7959 dump_printf_loc (MSG_NOTE, vect_location,
7960 "==> examining pattern statement: ");
7961 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7964 else
7966 if (dump_enabled_p ())
7967 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
7969 return true;
7972 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7973 && node == NULL
7974 && pattern_stmt
7975 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7976 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7978 /* Analyze PATTERN_STMT too. */
7979 if (dump_enabled_p ())
7981 dump_printf_loc (MSG_NOTE, vect_location,
7982 "==> examining pattern statement: ");
7983 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7986 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7987 return false;
7990 if (is_pattern_stmt_p (stmt_info)
7991 && node == NULL
7992 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
7994 gimple_stmt_iterator si;
7996 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7998 gimple *pattern_def_stmt = gsi_stmt (si);
7999 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8000 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8002 /* Analyze def stmt of STMT if it's a pattern stmt. */
8003 if (dump_enabled_p ())
8005 dump_printf_loc (MSG_NOTE, vect_location,
8006 "==> examining pattern def statement: ");
8007 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8010 if (!vect_analyze_stmt (pattern_def_stmt,
8011 need_to_vectorize, node))
8012 return false;
8017 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8019 case vect_internal_def:
8020 break;
8022 case vect_reduction_def:
8023 case vect_nested_cycle:
8024 gcc_assert (!bb_vinfo
8025 && (relevance == vect_used_in_outer
8026 || relevance == vect_used_in_outer_by_reduction
8027 || relevance == vect_used_by_reduction
8028 || relevance == vect_unused_in_scope));
8029 break;
8031 case vect_induction_def:
8032 case vect_constant_def:
8033 case vect_external_def:
8034 case vect_unknown_def_type:
8035 default:
8036 gcc_unreachable ();
8039 if (bb_vinfo)
8041 gcc_assert (PURE_SLP_STMT (stmt_info));
8043 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
8044 if (dump_enabled_p ())
8046 dump_printf_loc (MSG_NOTE, vect_location,
8047 "get vectype for scalar type: ");
8048 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
8049 dump_printf (MSG_NOTE, "\n");
8052 vectype = get_vectype_for_scalar_type (scalar_type);
8053 if (!vectype)
8055 if (dump_enabled_p ())
8057 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8058 "not SLPed: unsupported data-type ");
8059 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
8060 scalar_type);
8061 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8063 return false;
8066 if (dump_enabled_p ())
8068 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
8069 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
8070 dump_printf (MSG_NOTE, "\n");
8073 STMT_VINFO_VECTYPE (stmt_info) = vectype;
8076 if (STMT_VINFO_RELEVANT_P (stmt_info))
8078 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8079 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8080 || (is_gimple_call (stmt)
8081 && gimple_call_lhs (stmt) == NULL_TREE));
8082 *need_to_vectorize = true;
8085 if (PURE_SLP_STMT (stmt_info) && !node)
8087 dump_printf_loc (MSG_NOTE, vect_location,
8088 "handled only by SLP analysis\n");
8089 return true;
8092 ok = true;
8093 if (!bb_vinfo
8094 && (STMT_VINFO_RELEVANT_P (stmt_info)
8095 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8096 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8097 || vectorizable_conversion (stmt, NULL, NULL, node)
8098 || vectorizable_shift (stmt, NULL, NULL, node)
8099 || vectorizable_operation (stmt, NULL, NULL, node)
8100 || vectorizable_assignment (stmt, NULL, NULL, node)
8101 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8102 || vectorizable_call (stmt, NULL, NULL, node)
8103 || vectorizable_store (stmt, NULL, NULL, node)
8104 || vectorizable_reduction (stmt, NULL, NULL, node)
8105 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8106 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8107 else
8109 if (bb_vinfo)
8110 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8111 || vectorizable_conversion (stmt, NULL, NULL, node)
8112 || vectorizable_shift (stmt, NULL, NULL, node)
8113 || vectorizable_operation (stmt, NULL, NULL, node)
8114 || vectorizable_assignment (stmt, NULL, NULL, node)
8115 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8116 || vectorizable_call (stmt, NULL, NULL, node)
8117 || vectorizable_store (stmt, NULL, NULL, node)
8118 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8119 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8122 if (!ok)
8124 if (dump_enabled_p ())
8126 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8127 "not vectorized: relevant stmt not ");
8128 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8129 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8132 return false;
8135 if (bb_vinfo)
8136 return true;
8138 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8139 need extra handling, except for vectorizable reductions. */
8140 if (STMT_VINFO_LIVE_P (stmt_info)
8141 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8142 ok = vectorizable_live_operation (stmt, NULL, NULL);
8144 if (!ok)
8146 if (dump_enabled_p ())
8148 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8149 "not vectorized: live stmt not ");
8150 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8151 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8154 return false;
8157 return true;
8161 /* Function vect_transform_stmt.
8163 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8165 bool
8166 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8167 bool *grouped_store, slp_tree slp_node,
8168 slp_instance slp_node_instance)
8170 bool is_store = false;
8171 gimple *vec_stmt = NULL;
8172 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8173 bool done;
8175 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8177 switch (STMT_VINFO_TYPE (stmt_info))
8179 case type_demotion_vec_info_type:
8180 case type_promotion_vec_info_type:
8181 case type_conversion_vec_info_type:
8182 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8183 gcc_assert (done);
8184 break;
8186 case induc_vec_info_type:
8187 gcc_assert (!slp_node);
8188 done = vectorizable_induction (stmt, gsi, &vec_stmt);
8189 gcc_assert (done);
8190 break;
8192 case shift_vec_info_type:
8193 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8194 gcc_assert (done);
8195 break;
8197 case op_vec_info_type:
8198 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8199 gcc_assert (done);
8200 break;
8202 case assignment_vec_info_type:
8203 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8204 gcc_assert (done);
8205 break;
8207 case load_vec_info_type:
8208 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8209 slp_node_instance);
8210 gcc_assert (done);
8211 break;
8213 case store_vec_info_type:
8214 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8215 gcc_assert (done);
8216 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8218 /* In case of interleaving, the whole chain is vectorized when the
8219 last store in the chain is reached. Store stmts before the last
8220 one are skipped, and there vec_stmt_info shouldn't be freed
8221 meanwhile. */
8222 *grouped_store = true;
8223 if (STMT_VINFO_VEC_STMT (stmt_info))
8224 is_store = true;
8226 else
8227 is_store = true;
8228 break;
8230 case condition_vec_info_type:
8231 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8232 gcc_assert (done);
8233 break;
8235 case comparison_vec_info_type:
8236 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8237 gcc_assert (done);
8238 break;
8240 case call_vec_info_type:
8241 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8242 stmt = gsi_stmt (*gsi);
8243 if (is_gimple_call (stmt)
8244 && gimple_call_internal_p (stmt)
8245 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
8246 is_store = true;
8247 break;
8249 case call_simd_clone_vec_info_type:
8250 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8251 stmt = gsi_stmt (*gsi);
8252 break;
8254 case reduc_vec_info_type:
8255 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
8256 gcc_assert (done);
8257 break;
8259 default:
8260 if (!STMT_VINFO_LIVE_P (stmt_info))
8262 if (dump_enabled_p ())
8263 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8264 "stmt not supported.\n");
8265 gcc_unreachable ();
8269 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8270 This would break hybrid SLP vectorization. */
8271 if (slp_node)
8272 gcc_assert (!vec_stmt
8273 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8275 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8276 is being vectorized, but outside the immediately enclosing loop. */
8277 if (vec_stmt
8278 && STMT_VINFO_LOOP_VINFO (stmt_info)
8279 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8280 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8281 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8282 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8283 || STMT_VINFO_RELEVANT (stmt_info) ==
8284 vect_used_in_outer_by_reduction))
8286 struct loop *innerloop = LOOP_VINFO_LOOP (
8287 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8288 imm_use_iterator imm_iter;
8289 use_operand_p use_p;
8290 tree scalar_dest;
8291 gimple *exit_phi;
8293 if (dump_enabled_p ())
8294 dump_printf_loc (MSG_NOTE, vect_location,
8295 "Record the vdef for outer-loop vectorization.\n");
8297 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8298 (to be used when vectorizing outer-loop stmts that use the DEF of
8299 STMT). */
8300 if (gimple_code (stmt) == GIMPLE_PHI)
8301 scalar_dest = PHI_RESULT (stmt);
8302 else
8303 scalar_dest = gimple_assign_lhs (stmt);
8305 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8307 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8309 exit_phi = USE_STMT (use_p);
8310 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8315 /* Handle stmts whose DEF is used outside the loop-nest that is
8316 being vectorized. */
8317 if (STMT_VINFO_LIVE_P (stmt_info)
8318 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8320 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
8321 gcc_assert (done);
8324 if (vec_stmt)
8325 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8327 return is_store;
8331 /* Remove a group of stores (for SLP or interleaving), free their
8332 stmt_vec_info. */
8334 void
8335 vect_remove_stores (gimple *first_stmt)
8337 gimple *next = first_stmt;
8338 gimple *tmp;
8339 gimple_stmt_iterator next_si;
8341 while (next)
8343 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8345 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8346 if (is_pattern_stmt_p (stmt_info))
8347 next = STMT_VINFO_RELATED_STMT (stmt_info);
8348 /* Free the attached stmt_vec_info and remove the stmt. */
8349 next_si = gsi_for_stmt (next);
8350 unlink_stmt_vdef (next);
8351 gsi_remove (&next_si, true);
8352 release_defs (next);
8353 free_stmt_vec_info (next);
8354 next = tmp;
8359 /* Function new_stmt_vec_info.
8361 Create and initialize a new stmt_vec_info struct for STMT. */
8363 stmt_vec_info
8364 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
8366 stmt_vec_info res;
8367 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8369 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8370 STMT_VINFO_STMT (res) = stmt;
8371 res->vinfo = vinfo;
8372 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
8373 STMT_VINFO_LIVE_P (res) = false;
8374 STMT_VINFO_VECTYPE (res) = NULL;
8375 STMT_VINFO_VEC_STMT (res) = NULL;
8376 STMT_VINFO_VECTORIZABLE (res) = true;
8377 STMT_VINFO_IN_PATTERN_P (res) = false;
8378 STMT_VINFO_RELATED_STMT (res) = NULL;
8379 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
8380 STMT_VINFO_DATA_REF (res) = NULL;
8381 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
8383 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
8384 STMT_VINFO_DR_OFFSET (res) = NULL;
8385 STMT_VINFO_DR_INIT (res) = NULL;
8386 STMT_VINFO_DR_STEP (res) = NULL;
8387 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
8389 if (gimple_code (stmt) == GIMPLE_PHI
8390 && is_loop_header_bb_p (gimple_bb (stmt)))
8391 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8392 else
8393 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8395 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
8396 STMT_SLP_TYPE (res) = loop_vect;
8397 STMT_VINFO_NUM_SLP_USES (res) = 0;
8399 GROUP_FIRST_ELEMENT (res) = NULL;
8400 GROUP_NEXT_ELEMENT (res) = NULL;
8401 GROUP_SIZE (res) = 0;
8402 GROUP_STORE_COUNT (res) = 0;
8403 GROUP_GAP (res) = 0;
8404 GROUP_SAME_DR_STMT (res) = NULL;
8406 return res;
8410 /* Create a hash table for stmt_vec_info. */
8412 void
8413 init_stmt_vec_info_vec (void)
8415 gcc_assert (!stmt_vec_info_vec.exists ());
8416 stmt_vec_info_vec.create (50);
8420 /* Free hash table for stmt_vec_info. */
8422 void
8423 free_stmt_vec_info_vec (void)
8425 unsigned int i;
8426 stmt_vec_info info;
8427 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
8428 if (info != NULL)
8429 free_stmt_vec_info (STMT_VINFO_STMT (info));
8430 gcc_assert (stmt_vec_info_vec.exists ());
8431 stmt_vec_info_vec.release ();
8435 /* Free stmt vectorization related info. */
8437 void
8438 free_stmt_vec_info (gimple *stmt)
8440 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8442 if (!stmt_info)
8443 return;
8445 /* Check if this statement has a related "pattern stmt"
8446 (introduced by the vectorizer during the pattern recognition
8447 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8448 too. */
8449 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
8451 stmt_vec_info patt_info
8452 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8453 if (patt_info)
8455 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
8456 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
8457 gimple_set_bb (patt_stmt, NULL);
8458 tree lhs = gimple_get_lhs (patt_stmt);
8459 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8460 release_ssa_name (lhs);
8461 if (seq)
8463 gimple_stmt_iterator si;
8464 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
8466 gimple *seq_stmt = gsi_stmt (si);
8467 gimple_set_bb (seq_stmt, NULL);
8468 lhs = gimple_get_lhs (seq_stmt);
8469 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8470 release_ssa_name (lhs);
8471 free_stmt_vec_info (seq_stmt);
8474 free_stmt_vec_info (patt_stmt);
8478 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
8479 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
8480 set_vinfo_for_stmt (stmt, NULL);
8481 free (stmt_info);
8485 /* Function get_vectype_for_scalar_type_and_size.
8487 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8488 by the target. */
8490 static tree
8491 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
8493 machine_mode inner_mode = TYPE_MODE (scalar_type);
8494 machine_mode simd_mode;
8495 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
8496 int nunits;
8497 tree vectype;
8499 if (nbytes == 0)
8500 return NULL_TREE;
8502 if (GET_MODE_CLASS (inner_mode) != MODE_INT
8503 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
8504 return NULL_TREE;
8506 /* For vector types of elements whose mode precision doesn't
8507 match their types precision we use a element type of mode
8508 precision. The vectorization routines will have to make sure
8509 they support the proper result truncation/extension.
8510 We also make sure to build vector types with INTEGER_TYPE
8511 component type only. */
8512 if (INTEGRAL_TYPE_P (scalar_type)
8513 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
8514 || TREE_CODE (scalar_type) != INTEGER_TYPE))
8515 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
8516 TYPE_UNSIGNED (scalar_type));
8518 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8519 When the component mode passes the above test simply use a type
8520 corresponding to that mode. The theory is that any use that
8521 would cause problems with this will disable vectorization anyway. */
8522 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
8523 && !INTEGRAL_TYPE_P (scalar_type))
8524 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
8526 /* We can't build a vector type of elements with alignment bigger than
8527 their size. */
8528 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
8529 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
8530 TYPE_UNSIGNED (scalar_type));
8532 /* If we felt back to using the mode fail if there was
8533 no scalar type for it. */
8534 if (scalar_type == NULL_TREE)
8535 return NULL_TREE;
8537 /* If no size was supplied use the mode the target prefers. Otherwise
8538 lookup a vector mode of the specified size. */
8539 if (size == 0)
8540 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
8541 else
8542 simd_mode = mode_for_vector (inner_mode, size / nbytes);
8543 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
8544 if (nunits <= 1)
8545 return NULL_TREE;
8547 vectype = build_vector_type (scalar_type, nunits);
8549 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8550 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
8551 return NULL_TREE;
8553 return vectype;
8556 unsigned int current_vector_size;
8558 /* Function get_vectype_for_scalar_type.
8560 Returns the vector type corresponding to SCALAR_TYPE as supported
8561 by the target. */
8563 tree
8564 get_vectype_for_scalar_type (tree scalar_type)
8566 tree vectype;
8567 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
8568 current_vector_size);
8569 if (vectype
8570 && current_vector_size == 0)
8571 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
8572 return vectype;
8575 /* Function get_mask_type_for_scalar_type.
8577 Returns the mask type corresponding to a result of comparison
8578 of vectors of specified SCALAR_TYPE as supported by target. */
8580 tree
8581 get_mask_type_for_scalar_type (tree scalar_type)
8583 tree vectype = get_vectype_for_scalar_type (scalar_type);
8585 if (!vectype)
8586 return NULL;
8588 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
8589 current_vector_size);
8592 /* Function get_same_sized_vectype
8594 Returns a vector type corresponding to SCALAR_TYPE of size
8595 VECTOR_TYPE if supported by the target. */
8597 tree
8598 get_same_sized_vectype (tree scalar_type, tree vector_type)
8600 if (TREE_CODE (scalar_type) == BOOLEAN_TYPE)
8601 return build_same_sized_truth_vector_type (vector_type);
8603 return get_vectype_for_scalar_type_and_size
8604 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
8607 /* Function vect_is_simple_use.
8609 Input:
8610 VINFO - the vect info of the loop or basic block that is being vectorized.
8611 OPERAND - operand in the loop or bb.
8612 Output:
8613 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
8614 DT - the type of definition
8616 Returns whether a stmt with OPERAND can be vectorized.
8617 For loops, supportable operands are constants, loop invariants, and operands
8618 that are defined by the current iteration of the loop. Unsupportable
8619 operands are those that are defined by a previous iteration of the loop (as
8620 is the case in reduction/induction computations).
8621 For basic blocks, supportable operands are constants and bb invariants.
8622 For now, operands defined outside the basic block are not supported. */
8624 bool
8625 vect_is_simple_use (tree operand, vec_info *vinfo,
8626 gimple **def_stmt, enum vect_def_type *dt)
8628 *def_stmt = NULL;
8629 *dt = vect_unknown_def_type;
8631 if (dump_enabled_p ())
8633 dump_printf_loc (MSG_NOTE, vect_location,
8634 "vect_is_simple_use: operand ");
8635 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
8636 dump_printf (MSG_NOTE, "\n");
8639 if (CONSTANT_CLASS_P (operand))
8641 *dt = vect_constant_def;
8642 return true;
8645 if (is_gimple_min_invariant (operand))
8647 *dt = vect_external_def;
8648 return true;
8651 if (TREE_CODE (operand) != SSA_NAME)
8653 if (dump_enabled_p ())
8654 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8655 "not ssa-name.\n");
8656 return false;
8659 if (SSA_NAME_IS_DEFAULT_DEF (operand))
8661 *dt = vect_external_def;
8662 return true;
8665 *def_stmt = SSA_NAME_DEF_STMT (operand);
8666 if (dump_enabled_p ())
8668 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
8669 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
8672 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
8673 *dt = vect_external_def;
8674 else
8676 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
8677 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
8680 if (dump_enabled_p ())
8682 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
8683 switch (*dt)
8685 case vect_uninitialized_def:
8686 dump_printf (MSG_NOTE, "uninitialized\n");
8687 break;
8688 case vect_constant_def:
8689 dump_printf (MSG_NOTE, "constant\n");
8690 break;
8691 case vect_external_def:
8692 dump_printf (MSG_NOTE, "external\n");
8693 break;
8694 case vect_internal_def:
8695 dump_printf (MSG_NOTE, "internal\n");
8696 break;
8697 case vect_induction_def:
8698 dump_printf (MSG_NOTE, "induction\n");
8699 break;
8700 case vect_reduction_def:
8701 dump_printf (MSG_NOTE, "reduction\n");
8702 break;
8703 case vect_double_reduction_def:
8704 dump_printf (MSG_NOTE, "double reduction\n");
8705 break;
8706 case vect_nested_cycle:
8707 dump_printf (MSG_NOTE, "nested cycle\n");
8708 break;
8709 case vect_unknown_def_type:
8710 dump_printf (MSG_NOTE, "unknown\n");
8711 break;
8715 if (*dt == vect_unknown_def_type)
8717 if (dump_enabled_p ())
8718 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8719 "Unsupported pattern.\n");
8720 return false;
8723 switch (gimple_code (*def_stmt))
8725 case GIMPLE_PHI:
8726 case GIMPLE_ASSIGN:
8727 case GIMPLE_CALL:
8728 break;
8729 default:
8730 if (dump_enabled_p ())
8731 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8732 "unsupported defining stmt:\n");
8733 return false;
8736 return true;
8739 /* Function vect_is_simple_use.
8741 Same as vect_is_simple_use but also determines the vector operand
8742 type of OPERAND and stores it to *VECTYPE. If the definition of
8743 OPERAND is vect_uninitialized_def, vect_constant_def or
8744 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8745 is responsible to compute the best suited vector type for the
8746 scalar operand. */
8748 bool
8749 vect_is_simple_use (tree operand, vec_info *vinfo,
8750 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
8752 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
8753 return false;
8755 /* Now get a vector type if the def is internal, otherwise supply
8756 NULL_TREE and leave it up to the caller to figure out a proper
8757 type for the use stmt. */
8758 if (*dt == vect_internal_def
8759 || *dt == vect_induction_def
8760 || *dt == vect_reduction_def
8761 || *dt == vect_double_reduction_def
8762 || *dt == vect_nested_cycle)
8764 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
8766 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8767 && !STMT_VINFO_RELEVANT (stmt_info)
8768 && !STMT_VINFO_LIVE_P (stmt_info))
8769 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8771 *vectype = STMT_VINFO_VECTYPE (stmt_info);
8772 gcc_assert (*vectype != NULL_TREE);
8774 else if (*dt == vect_uninitialized_def
8775 || *dt == vect_constant_def
8776 || *dt == vect_external_def)
8777 *vectype = NULL_TREE;
8778 else
8779 gcc_unreachable ();
8781 return true;
8785 /* Function supportable_widening_operation
8787 Check whether an operation represented by the code CODE is a
8788 widening operation that is supported by the target platform in
8789 vector form (i.e., when operating on arguments of type VECTYPE_IN
8790 producing a result of type VECTYPE_OUT).
8792 Widening operations we currently support are NOP (CONVERT), FLOAT
8793 and WIDEN_MULT. This function checks if these operations are supported
8794 by the target platform either directly (via vector tree-codes), or via
8795 target builtins.
8797 Output:
8798 - CODE1 and CODE2 are codes of vector operations to be used when
8799 vectorizing the operation, if available.
8800 - MULTI_STEP_CVT determines the number of required intermediate steps in
8801 case of multi-step conversion (like char->short->int - in that case
8802 MULTI_STEP_CVT will be 1).
8803 - INTERM_TYPES contains the intermediate type required to perform the
8804 widening operation (short in the above example). */
8806 bool
8807 supportable_widening_operation (enum tree_code code, gimple *stmt,
8808 tree vectype_out, tree vectype_in,
8809 enum tree_code *code1, enum tree_code *code2,
8810 int *multi_step_cvt,
8811 vec<tree> *interm_types)
8813 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8814 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
8815 struct loop *vect_loop = NULL;
8816 machine_mode vec_mode;
8817 enum insn_code icode1, icode2;
8818 optab optab1, optab2;
8819 tree vectype = vectype_in;
8820 tree wide_vectype = vectype_out;
8821 enum tree_code c1, c2;
8822 int i;
8823 tree prev_type, intermediate_type;
8824 machine_mode intermediate_mode, prev_mode;
8825 optab optab3, optab4;
8827 *multi_step_cvt = 0;
8828 if (loop_info)
8829 vect_loop = LOOP_VINFO_LOOP (loop_info);
8831 switch (code)
8833 case WIDEN_MULT_EXPR:
8834 /* The result of a vectorized widening operation usually requires
8835 two vectors (because the widened results do not fit into one vector).
8836 The generated vector results would normally be expected to be
8837 generated in the same order as in the original scalar computation,
8838 i.e. if 8 results are generated in each vector iteration, they are
8839 to be organized as follows:
8840 vect1: [res1,res2,res3,res4],
8841 vect2: [res5,res6,res7,res8].
8843 However, in the special case that the result of the widening
8844 operation is used in a reduction computation only, the order doesn't
8845 matter (because when vectorizing a reduction we change the order of
8846 the computation). Some targets can take advantage of this and
8847 generate more efficient code. For example, targets like Altivec,
8848 that support widen_mult using a sequence of {mult_even,mult_odd}
8849 generate the following vectors:
8850 vect1: [res1,res3,res5,res7],
8851 vect2: [res2,res4,res6,res8].
8853 When vectorizing outer-loops, we execute the inner-loop sequentially
8854 (each vectorized inner-loop iteration contributes to VF outer-loop
8855 iterations in parallel). We therefore don't allow to change the
8856 order of the computation in the inner-loop during outer-loop
8857 vectorization. */
8858 /* TODO: Another case in which order doesn't *really* matter is when we
8859 widen and then contract again, e.g. (short)((int)x * y >> 8).
8860 Normally, pack_trunc performs an even/odd permute, whereas the
8861 repack from an even/odd expansion would be an interleave, which
8862 would be significantly simpler for e.g. AVX2. */
8863 /* In any case, in order to avoid duplicating the code below, recurse
8864 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8865 are properly set up for the caller. If we fail, we'll continue with
8866 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8867 if (vect_loop
8868 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
8869 && !nested_in_vect_loop_p (vect_loop, stmt)
8870 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
8871 stmt, vectype_out, vectype_in,
8872 code1, code2, multi_step_cvt,
8873 interm_types))
8875 /* Elements in a vector with vect_used_by_reduction property cannot
8876 be reordered if the use chain with this property does not have the
8877 same operation. One such an example is s += a * b, where elements
8878 in a and b cannot be reordered. Here we check if the vector defined
8879 by STMT is only directly used in the reduction statement. */
8880 tree lhs = gimple_assign_lhs (stmt);
8881 use_operand_p dummy;
8882 gimple *use_stmt;
8883 stmt_vec_info use_stmt_info = NULL;
8884 if (single_imm_use (lhs, &dummy, &use_stmt)
8885 && (use_stmt_info = vinfo_for_stmt (use_stmt))
8886 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
8887 return true;
8889 c1 = VEC_WIDEN_MULT_LO_EXPR;
8890 c2 = VEC_WIDEN_MULT_HI_EXPR;
8891 break;
8893 case DOT_PROD_EXPR:
8894 c1 = DOT_PROD_EXPR;
8895 c2 = DOT_PROD_EXPR;
8896 break;
8898 case SAD_EXPR:
8899 c1 = SAD_EXPR;
8900 c2 = SAD_EXPR;
8901 break;
8903 case VEC_WIDEN_MULT_EVEN_EXPR:
8904 /* Support the recursion induced just above. */
8905 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
8906 c2 = VEC_WIDEN_MULT_ODD_EXPR;
8907 break;
8909 case WIDEN_LSHIFT_EXPR:
8910 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
8911 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
8912 break;
8914 CASE_CONVERT:
8915 c1 = VEC_UNPACK_LO_EXPR;
8916 c2 = VEC_UNPACK_HI_EXPR;
8917 break;
8919 case FLOAT_EXPR:
8920 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
8921 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
8922 break;
8924 case FIX_TRUNC_EXPR:
8925 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8926 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8927 computing the operation. */
8928 return false;
8930 default:
8931 gcc_unreachable ();
8934 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
8935 std::swap (c1, c2);
8937 if (code == FIX_TRUNC_EXPR)
8939 /* The signedness is determined from output operand. */
8940 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8941 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
8943 else
8945 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8946 optab2 = optab_for_tree_code (c2, vectype, optab_default);
8949 if (!optab1 || !optab2)
8950 return false;
8952 vec_mode = TYPE_MODE (vectype);
8953 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
8954 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
8955 return false;
8957 *code1 = c1;
8958 *code2 = c2;
8960 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8961 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8962 /* For scalar masks we may have different boolean
8963 vector types having the same QImode. Thus we
8964 add additional check for elements number. */
8965 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
8966 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
8967 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
8969 /* Check if it's a multi-step conversion that can be done using intermediate
8970 types. */
8972 prev_type = vectype;
8973 prev_mode = vec_mode;
8975 if (!CONVERT_EXPR_CODE_P (code))
8976 return false;
8978 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8979 intermediate steps in promotion sequence. We try
8980 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8981 not. */
8982 interm_types->create (MAX_INTERM_CVT_STEPS);
8983 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8985 intermediate_mode = insn_data[icode1].operand[0].mode;
8986 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
8988 intermediate_type
8989 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
8990 current_vector_size);
8991 if (intermediate_mode != TYPE_MODE (intermediate_type))
8992 return false;
8994 else
8995 intermediate_type
8996 = lang_hooks.types.type_for_mode (intermediate_mode,
8997 TYPE_UNSIGNED (prev_type));
8999 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9000 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9002 if (!optab3 || !optab4
9003 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9004 || insn_data[icode1].operand[0].mode != intermediate_mode
9005 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9006 || insn_data[icode2].operand[0].mode != intermediate_mode
9007 || ((icode1 = optab_handler (optab3, intermediate_mode))
9008 == CODE_FOR_nothing)
9009 || ((icode2 = optab_handler (optab4, intermediate_mode))
9010 == CODE_FOR_nothing))
9011 break;
9013 interm_types->quick_push (intermediate_type);
9014 (*multi_step_cvt)++;
9016 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9017 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9018 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9019 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9020 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9022 prev_type = intermediate_type;
9023 prev_mode = intermediate_mode;
9026 interm_types->release ();
9027 return false;
9031 /* Function supportable_narrowing_operation
9033 Check whether an operation represented by the code CODE is a
9034 narrowing operation that is supported by the target platform in
9035 vector form (i.e., when operating on arguments of type VECTYPE_IN
9036 and producing a result of type VECTYPE_OUT).
9038 Narrowing operations we currently support are NOP (CONVERT) and
9039 FIX_TRUNC. This function checks if these operations are supported by
9040 the target platform directly via vector tree-codes.
9042 Output:
9043 - CODE1 is the code of a vector operation to be used when
9044 vectorizing the operation, if available.
9045 - MULTI_STEP_CVT determines the number of required intermediate steps in
9046 case of multi-step conversion (like int->short->char - in that case
9047 MULTI_STEP_CVT will be 1).
9048 - INTERM_TYPES contains the intermediate type required to perform the
9049 narrowing operation (short in the above example). */
9051 bool
9052 supportable_narrowing_operation (enum tree_code code,
9053 tree vectype_out, tree vectype_in,
9054 enum tree_code *code1, int *multi_step_cvt,
9055 vec<tree> *interm_types)
9057 machine_mode vec_mode;
9058 enum insn_code icode1;
9059 optab optab1, interm_optab;
9060 tree vectype = vectype_in;
9061 tree narrow_vectype = vectype_out;
9062 enum tree_code c1;
9063 tree intermediate_type, prev_type;
9064 machine_mode intermediate_mode, prev_mode;
9065 int i;
9066 bool uns;
9068 *multi_step_cvt = 0;
9069 switch (code)
9071 CASE_CONVERT:
9072 c1 = VEC_PACK_TRUNC_EXPR;
9073 break;
9075 case FIX_TRUNC_EXPR:
9076 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9077 break;
9079 case FLOAT_EXPR:
9080 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9081 tree code and optabs used for computing the operation. */
9082 return false;
9084 default:
9085 gcc_unreachable ();
9088 if (code == FIX_TRUNC_EXPR)
9089 /* The signedness is determined from output operand. */
9090 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9091 else
9092 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9094 if (!optab1)
9095 return false;
9097 vec_mode = TYPE_MODE (vectype);
9098 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9099 return false;
9101 *code1 = c1;
9103 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9104 /* For scalar masks we may have different boolean
9105 vector types having the same QImode. Thus we
9106 add additional check for elements number. */
9107 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9108 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9109 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9111 /* Check if it's a multi-step conversion that can be done using intermediate
9112 types. */
9113 prev_mode = vec_mode;
9114 prev_type = vectype;
9115 if (code == FIX_TRUNC_EXPR)
9116 uns = TYPE_UNSIGNED (vectype_out);
9117 else
9118 uns = TYPE_UNSIGNED (vectype);
9120 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9121 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9122 costly than signed. */
9123 if (code == FIX_TRUNC_EXPR && uns)
9125 enum insn_code icode2;
9127 intermediate_type
9128 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9129 interm_optab
9130 = optab_for_tree_code (c1, intermediate_type, optab_default);
9131 if (interm_optab != unknown_optab
9132 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9133 && insn_data[icode1].operand[0].mode
9134 == insn_data[icode2].operand[0].mode)
9136 uns = false;
9137 optab1 = interm_optab;
9138 icode1 = icode2;
9142 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9143 intermediate steps in promotion sequence. We try
9144 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9145 interm_types->create (MAX_INTERM_CVT_STEPS);
9146 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9148 intermediate_mode = insn_data[icode1].operand[0].mode;
9149 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9151 intermediate_type
9152 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9153 current_vector_size);
9154 if (intermediate_mode != TYPE_MODE (intermediate_type))
9155 return false;
9157 else
9158 intermediate_type
9159 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9160 interm_optab
9161 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9162 optab_default);
9163 if (!interm_optab
9164 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9165 || insn_data[icode1].operand[0].mode != intermediate_mode
9166 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9167 == CODE_FOR_nothing))
9168 break;
9170 interm_types->quick_push (intermediate_type);
9171 (*multi_step_cvt)++;
9173 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9174 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9175 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9176 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9178 prev_mode = intermediate_mode;
9179 prev_type = intermediate_type;
9180 optab1 = interm_optab;
9183 interm_types->release ();
9184 return false;