Clean up some minor white space issues in trans-decl.c and trans-expr.c
[official-gcc.git] / gcc / tree-vect-stmts.c
blob635c7970fca0f3b1e2e11225327b2ee14a088a7c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Return the vectorized type for the given statement. */
57 tree
58 stmt_vectype (struct _stmt_vec_info *stmt_info)
60 return STMT_VINFO_VECTYPE (stmt_info);
63 /* Return TRUE iff the given statement is in an inner loop relative to
64 the loop being vectorized. */
65 bool
66 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
68 gimple *stmt = STMT_VINFO_STMT (stmt_info);
69 basic_block bb = gimple_bb (stmt);
70 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
71 struct loop* loop;
73 if (!loop_vinfo)
74 return false;
76 loop = LOOP_VINFO_LOOP (loop_vinfo);
78 return (bb->loop_father == loop->inner);
81 /* Record the cost of a statement, either by directly informing the
82 target model or by saving it in a vector for later processing.
83 Return a preliminary estimate of the statement's cost. */
85 unsigned
86 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
87 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
88 int misalign, enum vect_cost_model_location where)
90 if (body_cost_vec)
92 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
93 stmt_info_for_cost si = { count, kind,
94 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
95 misalign };
96 body_cost_vec->safe_push (si);
97 return (unsigned)
98 (builtin_vectorization_cost (kind, vectype, misalign) * count);
100 else
101 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
102 count, kind, stmt_info, misalign, where);
105 /* Return a variable of type ELEM_TYPE[NELEMS]. */
107 static tree
108 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
110 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
111 "vect_array");
114 /* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
119 static tree
120 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
121 tree array, unsigned HOST_WIDE_INT n)
123 tree vect_type, vect, vect_name, array_ref;
124 gimple *new_stmt;
126 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
127 vect_type = TREE_TYPE (TREE_TYPE (array));
128 vect = vect_create_destination_var (scalar_dest, vect_type);
129 array_ref = build4 (ARRAY_REF, vect_type, array,
130 build_int_cst (size_type_node, n),
131 NULL_TREE, NULL_TREE);
133 new_stmt = gimple_build_assign (vect, array_ref);
134 vect_name = make_ssa_name (vect, new_stmt);
135 gimple_assign_set_lhs (new_stmt, vect_name);
136 vect_finish_stmt_generation (stmt, new_stmt, gsi);
138 return vect_name;
141 /* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
145 static void
146 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
147 tree array, unsigned HOST_WIDE_INT n)
149 tree array_ref;
150 gimple *new_stmt;
152 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
153 build_int_cst (size_type_node, n),
154 NULL_TREE, NULL_TREE);
156 new_stmt = gimple_build_assign (array_ref, vect);
157 vect_finish_stmt_generation (stmt, new_stmt, gsi);
160 /* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
162 (and its group). */
164 static tree
165 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
167 tree mem_ref, alias_ptr_type;
169 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
170 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
171 /* Arrays have the same alignment as their type. */
172 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
173 return mem_ref;
176 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
178 /* Function vect_mark_relevant.
180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
182 static void
183 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
184 enum vect_relevant relevant, bool live_p,
185 bool used_in_pattern)
187 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
188 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
189 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
190 gimple *pattern_stmt;
192 if (dump_enabled_p ())
194 dump_printf_loc (MSG_NOTE, vect_location,
195 "mark relevant %d, live %d: ", relevant, live_p);
196 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
199 /* If this stmt is an original stmt in a pattern, we might need to mark its
200 related pattern stmt instead of the original stmt. However, such stmts
201 may have their own uses that are not in any pattern, in such cases the
202 stmt itself should be marked. */
203 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
205 bool found = false;
206 if (!used_in_pattern)
208 imm_use_iterator imm_iter;
209 use_operand_p use_p;
210 gimple *use_stmt;
211 tree lhs;
212 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
213 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
215 if (is_gimple_assign (stmt))
216 lhs = gimple_assign_lhs (stmt);
217 else
218 lhs = gimple_call_lhs (stmt);
220 /* This use is out of pattern use, if LHS has other uses that are
221 pattern uses, we should mark the stmt itself, and not the pattern
222 stmt. */
223 if (lhs && TREE_CODE (lhs) == SSA_NAME)
224 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
226 if (is_gimple_debug (USE_STMT (use_p)))
227 continue;
228 use_stmt = USE_STMT (use_p);
230 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
231 continue;
233 if (vinfo_for_stmt (use_stmt)
234 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
236 found = true;
237 break;
242 if (!found)
244 /* This is the last stmt in a sequence that was detected as a
245 pattern that can potentially be vectorized. Don't mark the stmt
246 as relevant/live because it's not going to be vectorized.
247 Instead mark the pattern-stmt that replaces it. */
249 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
251 if (dump_enabled_p ())
252 dump_printf_loc (MSG_NOTE, vect_location,
253 "last stmt in pattern. don't mark"
254 " relevant/live.\n");
255 stmt_info = vinfo_for_stmt (pattern_stmt);
256 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
257 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
258 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
259 stmt = pattern_stmt;
263 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
264 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
265 STMT_VINFO_RELEVANT (stmt_info) = relevant;
267 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
268 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
270 if (dump_enabled_p ())
271 dump_printf_loc (MSG_NOTE, vect_location,
272 "already marked relevant/live.\n");
273 return;
276 worklist->safe_push (stmt);
280 /* Function vect_stmt_relevant_p.
282 Return true if STMT in loop that is represented by LOOP_VINFO is
283 "relevant for vectorization".
285 A stmt is considered "relevant for vectorization" if:
286 - it has uses outside the loop.
287 - it has vdefs (it alters memory).
288 - control stmts in the loop (except for the exit condition).
290 CHECKME: what other side effects would the vectorizer allow? */
292 static bool
293 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
294 enum vect_relevant *relevant, bool *live_p)
296 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
297 ssa_op_iter op_iter;
298 imm_use_iterator imm_iter;
299 use_operand_p use_p;
300 def_operand_p def_p;
302 *relevant = vect_unused_in_scope;
303 *live_p = false;
305 /* cond stmt other than loop exit cond. */
306 if (is_ctrl_stmt (stmt)
307 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
308 != loop_exit_ctrl_vec_info_type)
309 *relevant = vect_used_in_scope;
311 /* changing memory. */
312 if (gimple_code (stmt) != GIMPLE_PHI)
313 if (gimple_vdef (stmt)
314 && !gimple_clobber_p (stmt))
316 if (dump_enabled_p ())
317 dump_printf_loc (MSG_NOTE, vect_location,
318 "vec_stmt_relevant_p: stmt has vdefs.\n");
319 *relevant = vect_used_in_scope;
322 /* uses outside the loop. */
323 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
325 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
327 basic_block bb = gimple_bb (USE_STMT (use_p));
328 if (!flow_bb_inside_loop_p (loop, bb))
330 if (dump_enabled_p ())
331 dump_printf_loc (MSG_NOTE, vect_location,
332 "vec_stmt_relevant_p: used out of loop.\n");
334 if (is_gimple_debug (USE_STMT (use_p)))
335 continue;
337 /* We expect all such uses to be in the loop exit phis
338 (because of loop closed form) */
339 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
340 gcc_assert (bb == single_exit (loop)->dest);
342 *live_p = true;
347 return (*live_p || *relevant);
351 /* Function exist_non_indexing_operands_for_use_p
353 USE is one of the uses attached to STMT. Check if USE is
354 used in STMT for anything other than indexing an array. */
356 static bool
357 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
359 tree operand;
360 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
362 /* USE corresponds to some operand in STMT. If there is no data
363 reference in STMT, then any operand that corresponds to USE
364 is not indexing an array. */
365 if (!STMT_VINFO_DATA_REF (stmt_info))
366 return true;
368 /* STMT has a data_ref. FORNOW this means that its of one of
369 the following forms:
370 -1- ARRAY_REF = var
371 -2- var = ARRAY_REF
372 (This should have been verified in analyze_data_refs).
374 'var' in the second case corresponds to a def, not a use,
375 so USE cannot correspond to any operands that are not used
376 for array indexing.
378 Therefore, all we need to check is if STMT falls into the
379 first case, and whether var corresponds to USE. */
381 if (!gimple_assign_copy_p (stmt))
383 if (is_gimple_call (stmt)
384 && gimple_call_internal_p (stmt))
385 switch (gimple_call_internal_fn (stmt))
387 case IFN_MASK_STORE:
388 operand = gimple_call_arg (stmt, 3);
389 if (operand == use)
390 return true;
391 /* FALLTHRU */
392 case IFN_MASK_LOAD:
393 operand = gimple_call_arg (stmt, 2);
394 if (operand == use)
395 return true;
396 break;
397 default:
398 break;
400 return false;
403 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
404 return false;
405 operand = gimple_assign_rhs1 (stmt);
406 if (TREE_CODE (operand) != SSA_NAME)
407 return false;
409 if (operand == use)
410 return true;
412 return false;
417 Function process_use.
419 Inputs:
420 - a USE in STMT in a loop represented by LOOP_VINFO
421 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
422 that defined USE. This is done by calling mark_relevant and passing it
423 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
424 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
425 be performed.
427 Outputs:
428 Generally, LIVE_P and RELEVANT are used to define the liveness and
429 relevance info of the DEF_STMT of this USE:
430 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
431 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
432 Exceptions:
433 - case 1: If USE is used only for address computations (e.g. array indexing),
434 which does not need to be directly vectorized, then the liveness/relevance
435 of the respective DEF_STMT is left unchanged.
436 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
437 skip DEF_STMT cause it had already been processed.
438 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
439 be modified accordingly.
441 Return true if everything is as expected. Return false otherwise. */
443 static bool
444 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
445 enum vect_relevant relevant, vec<gimple *> *worklist,
446 bool force)
448 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
449 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
450 stmt_vec_info dstmt_vinfo;
451 basic_block bb, def_bb;
452 gimple *def_stmt;
453 enum vect_def_type dt;
455 /* case 1: we are only interested in uses that need to be vectorized. Uses
456 that are used for address computation are not considered relevant. */
457 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
458 return true;
460 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
462 if (dump_enabled_p ())
463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
464 "not vectorized: unsupported use in stmt.\n");
465 return false;
468 if (!def_stmt || gimple_nop_p (def_stmt))
469 return true;
471 def_bb = gimple_bb (def_stmt);
472 if (!flow_bb_inside_loop_p (loop, def_bb))
474 if (dump_enabled_p ())
475 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
476 return true;
479 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
480 DEF_STMT must have already been processed, because this should be the
481 only way that STMT, which is a reduction-phi, was put in the worklist,
482 as there should be no other uses for DEF_STMT in the loop. So we just
483 check that everything is as expected, and we are done. */
484 dstmt_vinfo = vinfo_for_stmt (def_stmt);
485 bb = gimple_bb (stmt);
486 if (gimple_code (stmt) == GIMPLE_PHI
487 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
488 && gimple_code (def_stmt) != GIMPLE_PHI
489 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
490 && bb->loop_father == def_bb->loop_father)
492 if (dump_enabled_p ())
493 dump_printf_loc (MSG_NOTE, vect_location,
494 "reduc-stmt defining reduc-phi in the same nest.\n");
495 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
496 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
497 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
498 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
499 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
500 return true;
503 /* case 3a: outer-loop stmt defining an inner-loop stmt:
504 outer-loop-header-bb:
505 d = def_stmt
506 inner-loop:
507 stmt # use (d)
508 outer-loop-tail-bb:
509 ... */
510 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
512 if (dump_enabled_p ())
513 dump_printf_loc (MSG_NOTE, vect_location,
514 "outer-loop def-stmt defining inner-loop stmt.\n");
516 switch (relevant)
518 case vect_unused_in_scope:
519 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
520 vect_used_in_scope : vect_unused_in_scope;
521 break;
523 case vect_used_in_outer_by_reduction:
524 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
525 relevant = vect_used_by_reduction;
526 break;
528 case vect_used_in_outer:
529 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
530 relevant = vect_used_in_scope;
531 break;
533 case vect_used_in_scope:
534 break;
536 default:
537 gcc_unreachable ();
541 /* case 3b: inner-loop stmt defining an outer-loop stmt:
542 outer-loop-header-bb:
544 inner-loop:
545 d = def_stmt
546 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
547 stmt # use (d) */
548 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
550 if (dump_enabled_p ())
551 dump_printf_loc (MSG_NOTE, vect_location,
552 "inner-loop def-stmt defining outer-loop stmt.\n");
554 switch (relevant)
556 case vect_unused_in_scope:
557 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
558 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
559 vect_used_in_outer_by_reduction : vect_unused_in_scope;
560 break;
562 case vect_used_by_reduction:
563 relevant = vect_used_in_outer_by_reduction;
564 break;
566 case vect_used_in_scope:
567 relevant = vect_used_in_outer;
568 break;
570 default:
571 gcc_unreachable ();
575 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
576 is_pattern_stmt_p (stmt_vinfo));
577 return true;
581 /* Function vect_mark_stmts_to_be_vectorized.
583 Not all stmts in the loop need to be vectorized. For example:
585 for i...
586 for j...
587 1. T0 = i + j
588 2. T1 = a[T0]
590 3. j = j + 1
592 Stmt 1 and 3 do not need to be vectorized, because loop control and
593 addressing of vectorized data-refs are handled differently.
595 This pass detects such stmts. */
597 bool
598 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
600 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
601 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
602 unsigned int nbbs = loop->num_nodes;
603 gimple_stmt_iterator si;
604 gimple *stmt;
605 unsigned int i;
606 stmt_vec_info stmt_vinfo;
607 basic_block bb;
608 gimple *phi;
609 bool live_p;
610 enum vect_relevant relevant, tmp_relevant;
611 enum vect_def_type def_type;
613 if (dump_enabled_p ())
614 dump_printf_loc (MSG_NOTE, vect_location,
615 "=== vect_mark_stmts_to_be_vectorized ===\n");
617 auto_vec<gimple *, 64> worklist;
619 /* 1. Init worklist. */
620 for (i = 0; i < nbbs; i++)
622 bb = bbs[i];
623 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
625 phi = gsi_stmt (si);
626 if (dump_enabled_p ())
628 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
629 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
632 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
633 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
635 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
637 stmt = gsi_stmt (si);
638 if (dump_enabled_p ())
640 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
641 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
644 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
645 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
649 /* 2. Process_worklist */
650 while (worklist.length () > 0)
652 use_operand_p use_p;
653 ssa_op_iter iter;
655 stmt = worklist.pop ();
656 if (dump_enabled_p ())
658 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
659 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant and live/dead according to the
664 liveness and relevance properties of STMT. */
665 stmt_vinfo = vinfo_for_stmt (stmt);
666 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
667 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
669 /* Generally, the liveness and relevance properties of STMT are
670 propagated as is to the DEF_STMTs of its USEs:
671 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
672 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
674 One exception is when STMT has been identified as defining a reduction
675 variable; in this case we set the liveness/relevance as follows:
676 live_p = false
677 relevant = vect_used_by_reduction
678 This is because we distinguish between two kinds of relevant stmts -
679 those that are used by a reduction computation, and those that are
680 (also) used by a regular computation. This allows us later on to
681 identify stmts that are used solely by a reduction, and therefore the
682 order of the results that they produce does not have to be kept. */
684 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
685 tmp_relevant = relevant;
686 switch (def_type)
688 case vect_reduction_def:
689 switch (tmp_relevant)
691 case vect_unused_in_scope:
692 relevant = vect_used_by_reduction;
693 break;
695 case vect_used_by_reduction:
696 if (gimple_code (stmt) == GIMPLE_PHI)
697 break;
698 /* fall through */
700 default:
701 if (dump_enabled_p ())
702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
703 "unsupported use of reduction.\n");
704 return false;
707 live_p = false;
708 break;
710 case vect_nested_cycle:
711 if (tmp_relevant != vect_unused_in_scope
712 && tmp_relevant != vect_used_in_outer_by_reduction
713 && tmp_relevant != vect_used_in_outer)
715 if (dump_enabled_p ())
716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
717 "unsupported use of nested cycle.\n");
719 return false;
722 live_p = false;
723 break;
725 case vect_double_reduction_def:
726 if (tmp_relevant != vect_unused_in_scope
727 && tmp_relevant != vect_used_by_reduction)
729 if (dump_enabled_p ())
730 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
731 "unsupported use of double reduction.\n");
733 return false;
736 live_p = false;
737 break;
739 default:
740 break;
743 if (is_pattern_stmt_p (stmt_vinfo))
745 /* Pattern statements are not inserted into the code, so
746 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
747 have to scan the RHS or function arguments instead. */
748 if (is_gimple_assign (stmt))
750 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
751 tree op = gimple_assign_rhs1 (stmt);
753 i = 1;
754 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
756 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
757 live_p, relevant, &worklist, false)
758 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
759 live_p, relevant, &worklist, false))
760 return false;
761 i = 2;
763 for (; i < gimple_num_ops (stmt); i++)
765 op = gimple_op (stmt, i);
766 if (TREE_CODE (op) == SSA_NAME
767 && !process_use (stmt, op, loop_vinfo, live_p, relevant,
768 &worklist, false))
769 return false;
772 else if (is_gimple_call (stmt))
774 for (i = 0; i < gimple_call_num_args (stmt); i++)
776 tree arg = gimple_call_arg (stmt, i);
777 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
778 &worklist, false))
779 return false;
783 else
784 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
786 tree op = USE_FROM_PTR (use_p);
787 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
788 &worklist, false))
789 return false;
792 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
794 tree off;
795 tree decl = vect_check_gather_scatter (stmt, loop_vinfo, NULL, &off, NULL);
796 gcc_assert (decl);
797 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
798 &worklist, true))
799 return false;
801 } /* while worklist */
803 return true;
807 /* Function vect_model_simple_cost.
809 Models cost for simple operations, i.e. those that only emit ncopies of a
810 single op. Right now, this does not account for multiple insns that could
811 be generated for the single vector op. We will handle that shortly. */
813 void
814 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
815 enum vect_def_type *dt,
816 stmt_vector_for_cost *prologue_cost_vec,
817 stmt_vector_for_cost *body_cost_vec)
819 int i;
820 int inside_cost = 0, prologue_cost = 0;
822 /* The SLP costs were already calculated during SLP tree build. */
823 if (PURE_SLP_STMT (stmt_info))
824 return;
826 /* FORNOW: Assuming maximum 2 args per stmts. */
827 for (i = 0; i < 2; i++)
828 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
829 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
830 stmt_info, 0, vect_prologue);
832 /* Pass the inside-of-loop statements to the target-specific cost model. */
833 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
834 stmt_info, 0, vect_body);
836 if (dump_enabled_p ())
837 dump_printf_loc (MSG_NOTE, vect_location,
838 "vect_model_simple_cost: inside_cost = %d, "
839 "prologue_cost = %d .\n", inside_cost, prologue_cost);
843 /* Model cost for type demotion and promotion operations. PWR is normally
844 zero for single-step promotions and demotions. It will be one if
845 two-step promotion/demotion is required, and so on. Each additional
846 step doubles the number of instructions required. */
848 static void
849 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
850 enum vect_def_type *dt, int pwr)
852 int i, tmp;
853 int inside_cost = 0, prologue_cost = 0;
854 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
855 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
856 void *target_cost_data;
858 /* The SLP costs were already calculated during SLP tree build. */
859 if (PURE_SLP_STMT (stmt_info))
860 return;
862 if (loop_vinfo)
863 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
864 else
865 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
867 for (i = 0; i < pwr + 1; i++)
869 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
870 (i + 1) : i;
871 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
872 vec_promote_demote, stmt_info, 0,
873 vect_body);
876 /* FORNOW: Assuming maximum 2 args per stmts. */
877 for (i = 0; i < 2; i++)
878 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
879 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
880 stmt_info, 0, vect_prologue);
882 if (dump_enabled_p ())
883 dump_printf_loc (MSG_NOTE, vect_location,
884 "vect_model_promotion_demotion_cost: inside_cost = %d, "
885 "prologue_cost = %d .\n", inside_cost, prologue_cost);
888 /* Function vect_cost_group_size
890 For grouped load or store, return the group_size only if it is the first
891 load or store of a group, else return 1. This ensures that group size is
892 only returned once per group. */
894 static int
895 vect_cost_group_size (stmt_vec_info stmt_info)
897 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
899 if (first_stmt == STMT_VINFO_STMT (stmt_info))
900 return GROUP_SIZE (stmt_info);
902 return 1;
906 /* Function vect_model_store_cost
908 Models cost for stores. In the case of grouped accesses, one access
909 has the overhead of the grouped access attributed to it. */
911 void
912 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
913 bool store_lanes_p, enum vect_def_type dt,
914 slp_tree slp_node,
915 stmt_vector_for_cost *prologue_cost_vec,
916 stmt_vector_for_cost *body_cost_vec)
918 int group_size;
919 unsigned int inside_cost = 0, prologue_cost = 0;
920 struct data_reference *first_dr;
921 gimple *first_stmt;
923 if (dt == vect_constant_def || dt == vect_external_def)
924 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
925 stmt_info, 0, vect_prologue);
927 /* Grouped access? */
928 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
930 if (slp_node)
932 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
933 group_size = 1;
935 else
937 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
938 group_size = vect_cost_group_size (stmt_info);
941 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
943 /* Not a grouped access. */
944 else
946 group_size = 1;
947 first_dr = STMT_VINFO_DATA_REF (stmt_info);
950 /* We assume that the cost of a single store-lanes instruction is
951 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
952 access is instead being provided by a permute-and-store operation,
953 include the cost of the permutes. */
954 if (!store_lanes_p && group_size > 1
955 && !STMT_VINFO_STRIDED_P (stmt_info))
957 /* Uses a high and low interleave or shuffle operations for each
958 needed permute. */
959 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
960 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
961 stmt_info, 0, vect_body);
963 if (dump_enabled_p ())
964 dump_printf_loc (MSG_NOTE, vect_location,
965 "vect_model_store_cost: strided group_size = %d .\n",
966 group_size);
969 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
970 /* Costs of the stores. */
971 if (STMT_VINFO_STRIDED_P (stmt_info)
972 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
974 /* N scalar stores plus extracting the elements. */
975 inside_cost += record_stmt_cost (body_cost_vec,
976 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
977 scalar_store, stmt_info, 0, vect_body);
979 else
980 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
982 if (STMT_VINFO_STRIDED_P (stmt_info))
983 inside_cost += record_stmt_cost (body_cost_vec,
984 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
985 vec_to_scalar, stmt_info, 0, vect_body);
987 if (dump_enabled_p ())
988 dump_printf_loc (MSG_NOTE, vect_location,
989 "vect_model_store_cost: inside_cost = %d, "
990 "prologue_cost = %d .\n", inside_cost, prologue_cost);
994 /* Calculate cost of DR's memory access. */
995 void
996 vect_get_store_cost (struct data_reference *dr, int ncopies,
997 unsigned int *inside_cost,
998 stmt_vector_for_cost *body_cost_vec)
1000 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1001 gimple *stmt = DR_STMT (dr);
1002 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1004 switch (alignment_support_scheme)
1006 case dr_aligned:
1008 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1009 vector_store, stmt_info, 0,
1010 vect_body);
1012 if (dump_enabled_p ())
1013 dump_printf_loc (MSG_NOTE, vect_location,
1014 "vect_model_store_cost: aligned.\n");
1015 break;
1018 case dr_unaligned_supported:
1020 /* Here, we assign an additional cost for the unaligned store. */
1021 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1022 unaligned_store, stmt_info,
1023 DR_MISALIGNMENT (dr), vect_body);
1024 if (dump_enabled_p ())
1025 dump_printf_loc (MSG_NOTE, vect_location,
1026 "vect_model_store_cost: unaligned supported by "
1027 "hardware.\n");
1028 break;
1031 case dr_unaligned_unsupported:
1033 *inside_cost = VECT_MAX_COST;
1035 if (dump_enabled_p ())
1036 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1037 "vect_model_store_cost: unsupported access.\n");
1038 break;
1041 default:
1042 gcc_unreachable ();
1047 /* Function vect_model_load_cost
1049 Models cost for loads. In the case of grouped accesses, the last access
1050 has the overhead of the grouped access attributed to it. Since unaligned
1051 accesses are supported for loads, we also account for the costs of the
1052 access scheme chosen. */
1054 void
1055 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1056 bool load_lanes_p, slp_tree slp_node,
1057 stmt_vector_for_cost *prologue_cost_vec,
1058 stmt_vector_for_cost *body_cost_vec)
1060 int group_size;
1061 gimple *first_stmt;
1062 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1063 unsigned int inside_cost = 0, prologue_cost = 0;
1065 /* Grouped accesses? */
1066 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1067 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1069 group_size = vect_cost_group_size (stmt_info);
1070 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1072 /* Not a grouped access. */
1073 else
1075 group_size = 1;
1076 first_dr = dr;
1079 /* We assume that the cost of a single load-lanes instruction is
1080 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1081 access is instead being provided by a load-and-permute operation,
1082 include the cost of the permutes. */
1083 if (!load_lanes_p && group_size > 1
1084 && !STMT_VINFO_STRIDED_P (stmt_info))
1086 /* Uses an even and odd extract operations or shuffle operations
1087 for each needed permute. */
1088 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1089 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1090 stmt_info, 0, vect_body);
1092 if (dump_enabled_p ())
1093 dump_printf_loc (MSG_NOTE, vect_location,
1094 "vect_model_load_cost: strided group_size = %d .\n",
1095 group_size);
1098 /* The loads themselves. */
1099 if (STMT_VINFO_STRIDED_P (stmt_info)
1100 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1102 /* N scalar loads plus gathering them into a vector. */
1103 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1104 inside_cost += record_stmt_cost (body_cost_vec,
1105 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1106 scalar_load, stmt_info, 0, vect_body);
1108 else
1109 vect_get_load_cost (first_dr, ncopies,
1110 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1111 || group_size > 1 || slp_node),
1112 &inside_cost, &prologue_cost,
1113 prologue_cost_vec, body_cost_vec, true);
1114 if (STMT_VINFO_STRIDED_P (stmt_info))
1115 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1116 stmt_info, 0, vect_body);
1118 if (dump_enabled_p ())
1119 dump_printf_loc (MSG_NOTE, vect_location,
1120 "vect_model_load_cost: inside_cost = %d, "
1121 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1125 /* Calculate cost of DR's memory access. */
1126 void
1127 vect_get_load_cost (struct data_reference *dr, int ncopies,
1128 bool add_realign_cost, unsigned int *inside_cost,
1129 unsigned int *prologue_cost,
1130 stmt_vector_for_cost *prologue_cost_vec,
1131 stmt_vector_for_cost *body_cost_vec,
1132 bool record_prologue_costs)
1134 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1135 gimple *stmt = DR_STMT (dr);
1136 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1138 switch (alignment_support_scheme)
1140 case dr_aligned:
1142 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1143 stmt_info, 0, vect_body);
1145 if (dump_enabled_p ())
1146 dump_printf_loc (MSG_NOTE, vect_location,
1147 "vect_model_load_cost: aligned.\n");
1149 break;
1151 case dr_unaligned_supported:
1153 /* Here, we assign an additional cost for the unaligned load. */
1154 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1155 unaligned_load, stmt_info,
1156 DR_MISALIGNMENT (dr), vect_body);
1158 if (dump_enabled_p ())
1159 dump_printf_loc (MSG_NOTE, vect_location,
1160 "vect_model_load_cost: unaligned supported by "
1161 "hardware.\n");
1163 break;
1165 case dr_explicit_realign:
1167 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1168 vector_load, stmt_info, 0, vect_body);
1169 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1170 vec_perm, stmt_info, 0, vect_body);
1172 /* FIXME: If the misalignment remains fixed across the iterations of
1173 the containing loop, the following cost should be added to the
1174 prologue costs. */
1175 if (targetm.vectorize.builtin_mask_for_load)
1176 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1177 stmt_info, 0, vect_body);
1179 if (dump_enabled_p ())
1180 dump_printf_loc (MSG_NOTE, vect_location,
1181 "vect_model_load_cost: explicit realign\n");
1183 break;
1185 case dr_explicit_realign_optimized:
1187 if (dump_enabled_p ())
1188 dump_printf_loc (MSG_NOTE, vect_location,
1189 "vect_model_load_cost: unaligned software "
1190 "pipelined.\n");
1192 /* Unaligned software pipeline has a load of an address, an initial
1193 load, and possibly a mask operation to "prime" the loop. However,
1194 if this is an access in a group of loads, which provide grouped
1195 access, then the above cost should only be considered for one
1196 access in the group. Inside the loop, there is a load op
1197 and a realignment op. */
1199 if (add_realign_cost && record_prologue_costs)
1201 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1202 vector_stmt, stmt_info,
1203 0, vect_prologue);
1204 if (targetm.vectorize.builtin_mask_for_load)
1205 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1206 vector_stmt, stmt_info,
1207 0, vect_prologue);
1210 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1211 stmt_info, 0, vect_body);
1212 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1213 stmt_info, 0, vect_body);
1215 if (dump_enabled_p ())
1216 dump_printf_loc (MSG_NOTE, vect_location,
1217 "vect_model_load_cost: explicit realign optimized"
1218 "\n");
1220 break;
1223 case dr_unaligned_unsupported:
1225 *inside_cost = VECT_MAX_COST;
1227 if (dump_enabled_p ())
1228 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1229 "vect_model_load_cost: unsupported access.\n");
1230 break;
1233 default:
1234 gcc_unreachable ();
1238 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1239 the loop preheader for the vectorized stmt STMT. */
1241 static void
1242 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1244 if (gsi)
1245 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1246 else
1248 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1249 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1251 if (loop_vinfo)
1253 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1254 basic_block new_bb;
1255 edge pe;
1257 if (nested_in_vect_loop_p (loop, stmt))
1258 loop = loop->inner;
1260 pe = loop_preheader_edge (loop);
1261 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1262 gcc_assert (!new_bb);
1264 else
1266 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1267 basic_block bb;
1268 gimple_stmt_iterator gsi_bb_start;
1270 gcc_assert (bb_vinfo);
1271 bb = BB_VINFO_BB (bb_vinfo);
1272 gsi_bb_start = gsi_after_labels (bb);
1273 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1277 if (dump_enabled_p ())
1279 dump_printf_loc (MSG_NOTE, vect_location,
1280 "created new init_stmt: ");
1281 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1285 /* Function vect_init_vector.
1287 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1288 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1289 vector type a vector with all elements equal to VAL is created first.
1290 Place the initialization at BSI if it is not NULL. Otherwise, place the
1291 initialization at the loop preheader.
1292 Return the DEF of INIT_STMT.
1293 It will be used in the vectorization of STMT. */
1295 tree
1296 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1298 gimple *init_stmt;
1299 tree new_temp;
1301 if (TREE_CODE (type) == VECTOR_TYPE
1302 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1304 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1306 /* Scalar boolean value should be transformed into
1307 all zeros or all ones value before building a vector. */
1308 if (VECTOR_BOOLEAN_TYPE_P (type))
1310 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1311 tree false_val = build_zero_cst (TREE_TYPE (type));
1313 if (CONSTANT_CLASS_P (val))
1314 val = integer_zerop (val) ? false_val : true_val;
1315 else
1317 new_temp = make_ssa_name (TREE_TYPE (type));
1318 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1319 val, true_val, false_val);
1320 vect_init_vector_1 (stmt, init_stmt, gsi);
1321 val = new_temp;
1324 else if (CONSTANT_CLASS_P (val))
1325 val = fold_convert (TREE_TYPE (type), val);
1326 else
1328 new_temp = make_ssa_name (TREE_TYPE (type));
1329 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1330 vect_init_vector_1 (stmt, init_stmt, gsi);
1331 val = new_temp;
1334 val = build_vector_from_val (type, val);
1337 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1338 init_stmt = gimple_build_assign (new_temp, val);
1339 vect_init_vector_1 (stmt, init_stmt, gsi);
1340 return new_temp;
1344 /* Function vect_get_vec_def_for_operand.
1346 OP is an operand in STMT. This function returns a (vector) def that will be
1347 used in the vectorized stmt for STMT.
1349 In the case that OP is an SSA_NAME which is defined in the loop, then
1350 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1352 In case OP is an invariant or constant, a new stmt that creates a vector def
1353 needs to be introduced. VECTYPE may be used to specify a required type for
1354 vector invariant. */
1356 tree
1357 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1359 tree vec_oprnd;
1360 gimple *vec_stmt;
1361 gimple *def_stmt;
1362 stmt_vec_info def_stmt_info = NULL;
1363 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1364 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1365 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1366 enum vect_def_type dt;
1367 bool is_simple_use;
1368 tree vector_type;
1370 if (dump_enabled_p ())
1372 dump_printf_loc (MSG_NOTE, vect_location,
1373 "vect_get_vec_def_for_operand: ");
1374 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1375 dump_printf (MSG_NOTE, "\n");
1378 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1379 gcc_assert (is_simple_use);
1380 if (dump_enabled_p ())
1382 int loc_printed = 0;
1383 if (def_stmt)
1385 if (loc_printed)
1386 dump_printf (MSG_NOTE, " def_stmt = ");
1387 else
1388 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1389 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1393 switch (dt)
1395 /* operand is a constant or a loop invariant. */
1396 case vect_constant_def:
1397 case vect_external_def:
1399 if (vectype)
1400 vector_type = vectype;
1401 else if (TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE
1402 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1403 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1404 else
1405 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1407 gcc_assert (vector_type);
1408 return vect_init_vector (stmt, op, vector_type, NULL);
1411 /* operand is defined inside the loop. */
1412 case vect_internal_def:
1414 /* Get the def from the vectorized stmt. */
1415 def_stmt_info = vinfo_for_stmt (def_stmt);
1417 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1418 /* Get vectorized pattern statement. */
1419 if (!vec_stmt
1420 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1421 && !STMT_VINFO_RELEVANT (def_stmt_info))
1422 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1423 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1424 gcc_assert (vec_stmt);
1425 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1426 vec_oprnd = PHI_RESULT (vec_stmt);
1427 else if (is_gimple_call (vec_stmt))
1428 vec_oprnd = gimple_call_lhs (vec_stmt);
1429 else
1430 vec_oprnd = gimple_assign_lhs (vec_stmt);
1431 return vec_oprnd;
1434 /* operand is defined by a loop header phi - reduction */
1435 case vect_reduction_def:
1436 case vect_double_reduction_def:
1437 case vect_nested_cycle:
1438 /* Code should use get_initial_def_for_reduction. */
1439 gcc_unreachable ();
1441 /* operand is defined by loop-header phi - induction. */
1442 case vect_induction_def:
1444 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1446 /* Get the def from the vectorized stmt. */
1447 def_stmt_info = vinfo_for_stmt (def_stmt);
1448 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1449 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1450 vec_oprnd = PHI_RESULT (vec_stmt);
1451 else
1452 vec_oprnd = gimple_get_lhs (vec_stmt);
1453 return vec_oprnd;
1456 default:
1457 gcc_unreachable ();
1462 /* Function vect_get_vec_def_for_stmt_copy
1464 Return a vector-def for an operand. This function is used when the
1465 vectorized stmt to be created (by the caller to this function) is a "copy"
1466 created in case the vectorized result cannot fit in one vector, and several
1467 copies of the vector-stmt are required. In this case the vector-def is
1468 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1469 of the stmt that defines VEC_OPRND.
1470 DT is the type of the vector def VEC_OPRND.
1472 Context:
1473 In case the vectorization factor (VF) is bigger than the number
1474 of elements that can fit in a vectype (nunits), we have to generate
1475 more than one vector stmt to vectorize the scalar stmt. This situation
1476 arises when there are multiple data-types operated upon in the loop; the
1477 smallest data-type determines the VF, and as a result, when vectorizing
1478 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1479 vector stmt (each computing a vector of 'nunits' results, and together
1480 computing 'VF' results in each iteration). This function is called when
1481 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1482 which VF=16 and nunits=4, so the number of copies required is 4):
1484 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1486 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1487 VS1.1: vx.1 = memref1 VS1.2
1488 VS1.2: vx.2 = memref2 VS1.3
1489 VS1.3: vx.3 = memref3
1491 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1492 VSnew.1: vz1 = vx.1 + ... VSnew.2
1493 VSnew.2: vz2 = vx.2 + ... VSnew.3
1494 VSnew.3: vz3 = vx.3 + ...
1496 The vectorization of S1 is explained in vectorizable_load.
1497 The vectorization of S2:
1498 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1499 the function 'vect_get_vec_def_for_operand' is called to
1500 get the relevant vector-def for each operand of S2. For operand x it
1501 returns the vector-def 'vx.0'.
1503 To create the remaining copies of the vector-stmt (VSnew.j), this
1504 function is called to get the relevant vector-def for each operand. It is
1505 obtained from the respective VS1.j stmt, which is recorded in the
1506 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1508 For example, to obtain the vector-def 'vx.1' in order to create the
1509 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1510 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1511 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1512 and return its def ('vx.1').
1513 Overall, to create the above sequence this function will be called 3 times:
1514 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1515 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1516 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1518 tree
1519 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1521 gimple *vec_stmt_for_operand;
1522 stmt_vec_info def_stmt_info;
1524 /* Do nothing; can reuse same def. */
1525 if (dt == vect_external_def || dt == vect_constant_def )
1526 return vec_oprnd;
1528 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1529 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1530 gcc_assert (def_stmt_info);
1531 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1532 gcc_assert (vec_stmt_for_operand);
1533 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1534 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1535 else
1536 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1537 return vec_oprnd;
1541 /* Get vectorized definitions for the operands to create a copy of an original
1542 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1544 static void
1545 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1546 vec<tree> *vec_oprnds0,
1547 vec<tree> *vec_oprnds1)
1549 tree vec_oprnd = vec_oprnds0->pop ();
1551 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1552 vec_oprnds0->quick_push (vec_oprnd);
1554 if (vec_oprnds1 && vec_oprnds1->length ())
1556 vec_oprnd = vec_oprnds1->pop ();
1557 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1558 vec_oprnds1->quick_push (vec_oprnd);
1563 /* Get vectorized definitions for OP0 and OP1.
1564 REDUC_INDEX is the index of reduction operand in case of reduction,
1565 and -1 otherwise. */
1567 void
1568 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1569 vec<tree> *vec_oprnds0,
1570 vec<tree> *vec_oprnds1,
1571 slp_tree slp_node, int reduc_index)
1573 if (slp_node)
1575 int nops = (op1 == NULL_TREE) ? 1 : 2;
1576 auto_vec<tree> ops (nops);
1577 auto_vec<vec<tree> > vec_defs (nops);
1579 ops.quick_push (op0);
1580 if (op1)
1581 ops.quick_push (op1);
1583 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1585 *vec_oprnds0 = vec_defs[0];
1586 if (op1)
1587 *vec_oprnds1 = vec_defs[1];
1589 else
1591 tree vec_oprnd;
1593 vec_oprnds0->create (1);
1594 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1595 vec_oprnds0->quick_push (vec_oprnd);
1597 if (op1)
1599 vec_oprnds1->create (1);
1600 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1601 vec_oprnds1->quick_push (vec_oprnd);
1607 /* Function vect_finish_stmt_generation.
1609 Insert a new stmt. */
1611 void
1612 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1613 gimple_stmt_iterator *gsi)
1615 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1616 vec_info *vinfo = stmt_info->vinfo;
1618 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1620 if (!gsi_end_p (*gsi)
1621 && gimple_has_mem_ops (vec_stmt))
1623 gimple *at_stmt = gsi_stmt (*gsi);
1624 tree vuse = gimple_vuse (at_stmt);
1625 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1627 tree vdef = gimple_vdef (at_stmt);
1628 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1629 /* If we have an SSA vuse and insert a store, update virtual
1630 SSA form to avoid triggering the renamer. Do so only
1631 if we can easily see all uses - which is what almost always
1632 happens with the way vectorized stmts are inserted. */
1633 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1634 && ((is_gimple_assign (vec_stmt)
1635 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1636 || (is_gimple_call (vec_stmt)
1637 && !(gimple_call_flags (vec_stmt)
1638 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1640 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1641 gimple_set_vdef (vec_stmt, new_vdef);
1642 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1646 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1648 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1650 if (dump_enabled_p ())
1652 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1653 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1656 gimple_set_location (vec_stmt, gimple_location (stmt));
1658 /* While EH edges will generally prevent vectorization, stmt might
1659 e.g. be in a must-not-throw region. Ensure newly created stmts
1660 that could throw are part of the same region. */
1661 int lp_nr = lookup_stmt_eh_lp (stmt);
1662 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1663 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1666 /* We want to vectorize a call to combined function CFN with function
1667 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1668 as the types of all inputs. Check whether this is possible using
1669 an internal function, returning its code if so or IFN_LAST if not. */
1671 static internal_fn
1672 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1673 tree vectype_out, tree vectype_in)
1675 internal_fn ifn;
1676 if (internal_fn_p (cfn))
1677 ifn = as_internal_fn (cfn);
1678 else
1679 ifn = associated_internal_fn (fndecl);
1680 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1682 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1683 if (info.vectorizable)
1685 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1686 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1687 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1688 OPTIMIZE_FOR_SPEED))
1689 return ifn;
1692 return IFN_LAST;
1696 static tree permute_vec_elements (tree, tree, tree, gimple *,
1697 gimple_stmt_iterator *);
1700 /* Function vectorizable_mask_load_store.
1702 Check if STMT performs a conditional load or store that can be vectorized.
1703 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1704 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1705 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1707 static bool
1708 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
1709 gimple **vec_stmt, slp_tree slp_node)
1711 tree vec_dest = NULL;
1712 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1713 stmt_vec_info prev_stmt_info;
1714 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1715 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1716 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1717 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1718 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1719 tree rhs_vectype = NULL_TREE;
1720 tree mask_vectype;
1721 tree elem_type;
1722 gimple *new_stmt;
1723 tree dummy;
1724 tree dataref_ptr = NULL_TREE;
1725 gimple *ptr_incr;
1726 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1727 int ncopies;
1728 int i, j;
1729 bool inv_p;
1730 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1731 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1732 int gather_scale = 1;
1733 enum vect_def_type gather_dt = vect_unknown_def_type;
1734 bool is_store;
1735 tree mask;
1736 gimple *def_stmt;
1737 enum vect_def_type dt;
1739 if (slp_node != NULL)
1740 return false;
1742 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1743 gcc_assert (ncopies >= 1);
1745 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1746 mask = gimple_call_arg (stmt, 2);
1748 if (TREE_CODE (TREE_TYPE (mask)) != BOOLEAN_TYPE)
1749 return false;
1751 /* FORNOW. This restriction should be relaxed. */
1752 if (nested_in_vect_loop && ncopies > 1)
1754 if (dump_enabled_p ())
1755 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1756 "multiple types in nested loop.");
1757 return false;
1760 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1761 return false;
1763 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
1764 && ! vec_stmt)
1765 return false;
1767 if (!STMT_VINFO_DATA_REF (stmt_info))
1768 return false;
1770 elem_type = TREE_TYPE (vectype);
1772 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1773 return false;
1775 if (STMT_VINFO_STRIDED_P (stmt_info))
1776 return false;
1778 if (TREE_CODE (mask) != SSA_NAME)
1779 return false;
1781 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
1782 return false;
1784 if (!mask_vectype)
1785 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
1787 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
1788 return false;
1790 if (is_store)
1792 tree rhs = gimple_call_arg (stmt, 3);
1793 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
1794 return false;
1797 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1799 gimple *def_stmt;
1800 gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
1801 &gather_off, &gather_scale);
1802 gcc_assert (gather_decl);
1803 if (!vect_is_simple_use (gather_off, loop_vinfo, &def_stmt, &gather_dt,
1804 &gather_off_vectype))
1806 if (dump_enabled_p ())
1807 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1808 "gather index use not simple.");
1809 return false;
1812 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1813 tree masktype
1814 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1815 if (TREE_CODE (masktype) == INTEGER_TYPE)
1817 if (dump_enabled_p ())
1818 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1819 "masked gather with integer mask not supported.");
1820 return false;
1823 else if (tree_int_cst_compare (nested_in_vect_loop
1824 ? STMT_VINFO_DR_STEP (stmt_info)
1825 : DR_STEP (dr), size_zero_node) <= 0)
1826 return false;
1827 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1828 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
1829 TYPE_MODE (mask_vectype),
1830 !is_store)
1831 || (rhs_vectype
1832 && !useless_type_conversion_p (vectype, rhs_vectype)))
1833 return false;
1835 if (!vec_stmt) /* transformation not required. */
1837 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1838 if (is_store)
1839 vect_model_store_cost (stmt_info, ncopies, false, dt,
1840 NULL, NULL, NULL);
1841 else
1842 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1843 return true;
1846 /** Transform. **/
1848 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1850 tree vec_oprnd0 = NULL_TREE, op;
1851 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1852 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1853 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1854 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1855 tree mask_perm_mask = NULL_TREE;
1856 edge pe = loop_preheader_edge (loop);
1857 gimple_seq seq;
1858 basic_block new_bb;
1859 enum { NARROW, NONE, WIDEN } modifier;
1860 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1862 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1863 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1864 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1865 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1866 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1867 scaletype = TREE_VALUE (arglist);
1868 gcc_checking_assert (types_compatible_p (srctype, rettype)
1869 && types_compatible_p (srctype, masktype));
1871 if (nunits == gather_off_nunits)
1872 modifier = NONE;
1873 else if (nunits == gather_off_nunits / 2)
1875 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1876 modifier = WIDEN;
1878 for (i = 0; i < gather_off_nunits; ++i)
1879 sel[i] = i | nunits;
1881 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
1883 else if (nunits == gather_off_nunits * 2)
1885 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1886 modifier = NARROW;
1888 for (i = 0; i < nunits; ++i)
1889 sel[i] = i < gather_off_nunits
1890 ? i : i + nunits - gather_off_nunits;
1892 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
1893 ncopies *= 2;
1894 for (i = 0; i < nunits; ++i)
1895 sel[i] = i | gather_off_nunits;
1896 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
1898 else
1899 gcc_unreachable ();
1901 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1903 ptr = fold_convert (ptrtype, gather_base);
1904 if (!is_gimple_min_invariant (ptr))
1906 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1907 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1908 gcc_assert (!new_bb);
1911 scale = build_int_cst (scaletype, gather_scale);
1913 prev_stmt_info = NULL;
1914 for (j = 0; j < ncopies; ++j)
1916 if (modifier == WIDEN && (j & 1))
1917 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1918 perm_mask, stmt, gsi);
1919 else if (j == 0)
1920 op = vec_oprnd0
1921 = vect_get_vec_def_for_operand (gather_off, stmt);
1922 else
1923 op = vec_oprnd0
1924 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1926 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1928 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1929 == TYPE_VECTOR_SUBPARTS (idxtype));
1930 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
1931 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1932 new_stmt
1933 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1934 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1935 op = var;
1938 if (mask_perm_mask && (j & 1))
1939 mask_op = permute_vec_elements (mask_op, mask_op,
1940 mask_perm_mask, stmt, gsi);
1941 else
1943 if (j == 0)
1944 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
1945 else
1947 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
1948 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1951 mask_op = vec_mask;
1952 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1954 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
1955 == TYPE_VECTOR_SUBPARTS (masktype));
1956 var = vect_get_new_ssa_name (masktype, vect_simple_var);
1957 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
1958 new_stmt
1959 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
1960 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1961 mask_op = var;
1965 new_stmt
1966 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
1967 scale);
1969 if (!useless_type_conversion_p (vectype, rettype))
1971 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
1972 == TYPE_VECTOR_SUBPARTS (rettype));
1973 op = vect_get_new_ssa_name (rettype, vect_simple_var);
1974 gimple_call_set_lhs (new_stmt, op);
1975 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1976 var = make_ssa_name (vec_dest);
1977 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
1978 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1980 else
1982 var = make_ssa_name (vec_dest, new_stmt);
1983 gimple_call_set_lhs (new_stmt, var);
1986 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1988 if (modifier == NARROW)
1990 if ((j & 1) == 0)
1992 prev_res = var;
1993 continue;
1995 var = permute_vec_elements (prev_res, var,
1996 perm_mask, stmt, gsi);
1997 new_stmt = SSA_NAME_DEF_STMT (var);
2000 if (prev_stmt_info == NULL)
2001 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2002 else
2003 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2004 prev_stmt_info = vinfo_for_stmt (new_stmt);
2007 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2008 from the IL. */
2009 if (STMT_VINFO_RELATED_STMT (stmt_info))
2011 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2012 stmt_info = vinfo_for_stmt (stmt);
2014 tree lhs = gimple_call_lhs (stmt);
2015 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2016 set_vinfo_for_stmt (new_stmt, stmt_info);
2017 set_vinfo_for_stmt (stmt, NULL);
2018 STMT_VINFO_STMT (stmt_info) = new_stmt;
2019 gsi_replace (gsi, new_stmt, true);
2020 return true;
2022 else if (is_store)
2024 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2025 prev_stmt_info = NULL;
2026 for (i = 0; i < ncopies; i++)
2028 unsigned align, misalign;
2030 if (i == 0)
2032 tree rhs = gimple_call_arg (stmt, 3);
2033 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2034 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2035 /* We should have catched mismatched types earlier. */
2036 gcc_assert (useless_type_conversion_p (vectype,
2037 TREE_TYPE (vec_rhs)));
2038 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2039 NULL_TREE, &dummy, gsi,
2040 &ptr_incr, false, &inv_p);
2041 gcc_assert (!inv_p);
2043 else
2045 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
2046 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2047 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2048 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2049 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2050 TYPE_SIZE_UNIT (vectype));
2053 align = TYPE_ALIGN_UNIT (vectype);
2054 if (aligned_access_p (dr))
2055 misalign = 0;
2056 else if (DR_MISALIGNMENT (dr) == -1)
2058 align = TYPE_ALIGN_UNIT (elem_type);
2059 misalign = 0;
2061 else
2062 misalign = DR_MISALIGNMENT (dr);
2063 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2064 misalign);
2065 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2066 misalign ? misalign & -misalign : align);
2067 new_stmt
2068 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2069 ptr, vec_mask, vec_rhs);
2070 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2071 if (i == 0)
2072 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2073 else
2074 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2075 prev_stmt_info = vinfo_for_stmt (new_stmt);
2078 else
2080 tree vec_mask = NULL_TREE;
2081 prev_stmt_info = NULL;
2082 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2083 for (i = 0; i < ncopies; i++)
2085 unsigned align, misalign;
2087 if (i == 0)
2089 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2090 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2091 NULL_TREE, &dummy, gsi,
2092 &ptr_incr, false, &inv_p);
2093 gcc_assert (!inv_p);
2095 else
2097 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2098 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2099 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2100 TYPE_SIZE_UNIT (vectype));
2103 align = TYPE_ALIGN_UNIT (vectype);
2104 if (aligned_access_p (dr))
2105 misalign = 0;
2106 else if (DR_MISALIGNMENT (dr) == -1)
2108 align = TYPE_ALIGN_UNIT (elem_type);
2109 misalign = 0;
2111 else
2112 misalign = DR_MISALIGNMENT (dr);
2113 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2114 misalign);
2115 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2116 misalign ? misalign & -misalign : align);
2117 new_stmt
2118 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2119 ptr, vec_mask);
2120 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2121 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2122 if (i == 0)
2123 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2124 else
2125 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2126 prev_stmt_info = vinfo_for_stmt (new_stmt);
2130 if (!is_store)
2132 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2133 from the IL. */
2134 if (STMT_VINFO_RELATED_STMT (stmt_info))
2136 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2137 stmt_info = vinfo_for_stmt (stmt);
2139 tree lhs = gimple_call_lhs (stmt);
2140 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2141 set_vinfo_for_stmt (new_stmt, stmt_info);
2142 set_vinfo_for_stmt (stmt, NULL);
2143 STMT_VINFO_STMT (stmt_info) = new_stmt;
2144 gsi_replace (gsi, new_stmt, true);
2147 return true;
2150 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2151 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2152 in a single step. On success, store the binary pack code in
2153 *CONVERT_CODE. */
2155 static bool
2156 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2157 tree_code *convert_code)
2159 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2160 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2161 return false;
2163 tree_code code;
2164 int multi_step_cvt = 0;
2165 auto_vec <tree, 8> interm_types;
2166 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2167 &code, &multi_step_cvt,
2168 &interm_types)
2169 || multi_step_cvt)
2170 return false;
2172 *convert_code = code;
2173 return true;
2176 /* Function vectorizable_call.
2178 Check if GS performs a function call that can be vectorized.
2179 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2180 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2181 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2183 static bool
2184 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2185 slp_tree slp_node)
2187 gcall *stmt;
2188 tree vec_dest;
2189 tree scalar_dest;
2190 tree op, type;
2191 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2192 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2193 tree vectype_out, vectype_in;
2194 int nunits_in;
2195 int nunits_out;
2196 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2197 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2198 vec_info *vinfo = stmt_info->vinfo;
2199 tree fndecl, new_temp, rhs_type;
2200 gimple *def_stmt;
2201 enum vect_def_type dt[3]
2202 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2203 gimple *new_stmt = NULL;
2204 int ncopies, j;
2205 vec<tree> vargs = vNULL;
2206 enum { NARROW, NONE, WIDEN } modifier;
2207 size_t i, nargs;
2208 tree lhs;
2210 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2211 return false;
2213 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2214 && ! vec_stmt)
2215 return false;
2217 /* Is GS a vectorizable call? */
2218 stmt = dyn_cast <gcall *> (gs);
2219 if (!stmt)
2220 return false;
2222 if (gimple_call_internal_p (stmt)
2223 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2224 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2225 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2226 slp_node);
2228 if (gimple_call_lhs (stmt) == NULL_TREE
2229 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2230 return false;
2232 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2234 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2236 /* Process function arguments. */
2237 rhs_type = NULL_TREE;
2238 vectype_in = NULL_TREE;
2239 nargs = gimple_call_num_args (stmt);
2241 /* Bail out if the function has more than three arguments, we do not have
2242 interesting builtin functions to vectorize with more than two arguments
2243 except for fma. No arguments is also not good. */
2244 if (nargs == 0 || nargs > 3)
2245 return false;
2247 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2248 if (gimple_call_internal_p (stmt)
2249 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2251 nargs = 0;
2252 rhs_type = unsigned_type_node;
2255 for (i = 0; i < nargs; i++)
2257 tree opvectype;
2259 op = gimple_call_arg (stmt, i);
2261 /* We can only handle calls with arguments of the same type. */
2262 if (rhs_type
2263 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2265 if (dump_enabled_p ())
2266 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2267 "argument types differ.\n");
2268 return false;
2270 if (!rhs_type)
2271 rhs_type = TREE_TYPE (op);
2273 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2275 if (dump_enabled_p ())
2276 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2277 "use not simple.\n");
2278 return false;
2281 if (!vectype_in)
2282 vectype_in = opvectype;
2283 else if (opvectype
2284 && opvectype != vectype_in)
2286 if (dump_enabled_p ())
2287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2288 "argument vector types differ.\n");
2289 return false;
2292 /* If all arguments are external or constant defs use a vector type with
2293 the same size as the output vector type. */
2294 if (!vectype_in)
2295 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2296 if (vec_stmt)
2297 gcc_assert (vectype_in);
2298 if (!vectype_in)
2300 if (dump_enabled_p ())
2302 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2303 "no vectype for scalar type ");
2304 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2305 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2308 return false;
2311 /* FORNOW */
2312 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2313 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2314 if (nunits_in == nunits_out / 2)
2315 modifier = NARROW;
2316 else if (nunits_out == nunits_in)
2317 modifier = NONE;
2318 else if (nunits_out == nunits_in / 2)
2319 modifier = WIDEN;
2320 else
2321 return false;
2323 /* We only handle functions that do not read or clobber memory. */
2324 if (gimple_vuse (stmt))
2326 if (dump_enabled_p ())
2327 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2328 "function reads from or writes to memory.\n");
2329 return false;
2332 /* For now, we only vectorize functions if a target specific builtin
2333 is available. TODO -- in some cases, it might be profitable to
2334 insert the calls for pieces of the vector, in order to be able
2335 to vectorize other operations in the loop. */
2336 fndecl = NULL_TREE;
2337 internal_fn ifn = IFN_LAST;
2338 combined_fn cfn = gimple_call_combined_fn (stmt);
2339 tree callee = gimple_call_fndecl (stmt);
2341 /* First try using an internal function. */
2342 tree_code convert_code = ERROR_MARK;
2343 if (cfn != CFN_LAST
2344 && (modifier == NONE
2345 || (modifier == NARROW
2346 && simple_integer_narrowing (vectype_out, vectype_in,
2347 &convert_code))))
2348 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2349 vectype_in);
2351 /* If that fails, try asking for a target-specific built-in function. */
2352 if (ifn == IFN_LAST)
2354 if (cfn != CFN_LAST)
2355 fndecl = targetm.vectorize.builtin_vectorized_function
2356 (cfn, vectype_out, vectype_in);
2357 else
2358 fndecl = targetm.vectorize.builtin_md_vectorized_function
2359 (callee, vectype_out, vectype_in);
2362 if (ifn == IFN_LAST && !fndecl)
2364 if (cfn == CFN_GOMP_SIMD_LANE
2365 && !slp_node
2366 && loop_vinfo
2367 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2368 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2369 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2370 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2372 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2373 { 0, 1, 2, ... vf - 1 } vector. */
2374 gcc_assert (nargs == 0);
2376 else
2378 if (dump_enabled_p ())
2379 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2380 "function is not vectorizable.\n");
2381 return false;
2385 if (slp_node || PURE_SLP_STMT (stmt_info))
2386 ncopies = 1;
2387 else if (modifier == NARROW && ifn == IFN_LAST)
2388 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2389 else
2390 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2392 /* Sanity check: make sure that at least one copy of the vectorized stmt
2393 needs to be generated. */
2394 gcc_assert (ncopies >= 1);
2396 if (!vec_stmt) /* transformation not required. */
2398 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2399 if (dump_enabled_p ())
2400 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2401 "\n");
2402 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2403 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2404 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2405 vec_promote_demote, stmt_info, 0, vect_body);
2407 return true;
2410 /** Transform. **/
2412 if (dump_enabled_p ())
2413 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2415 /* Handle def. */
2416 scalar_dest = gimple_call_lhs (stmt);
2417 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2419 prev_stmt_info = NULL;
2420 if (modifier == NONE || ifn != IFN_LAST)
2422 tree prev_res = NULL_TREE;
2423 for (j = 0; j < ncopies; ++j)
2425 /* Build argument list for the vectorized call. */
2426 if (j == 0)
2427 vargs.create (nargs);
2428 else
2429 vargs.truncate (0);
2431 if (slp_node)
2433 auto_vec<vec<tree> > vec_defs (nargs);
2434 vec<tree> vec_oprnds0;
2436 for (i = 0; i < nargs; i++)
2437 vargs.quick_push (gimple_call_arg (stmt, i));
2438 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2439 vec_oprnds0 = vec_defs[0];
2441 /* Arguments are ready. Create the new vector stmt. */
2442 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2444 size_t k;
2445 for (k = 0; k < nargs; k++)
2447 vec<tree> vec_oprndsk = vec_defs[k];
2448 vargs[k] = vec_oprndsk[i];
2450 if (modifier == NARROW)
2452 tree half_res = make_ssa_name (vectype_in);
2453 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2454 gimple_call_set_lhs (new_stmt, half_res);
2455 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2456 if ((i & 1) == 0)
2458 prev_res = half_res;
2459 continue;
2461 new_temp = make_ssa_name (vec_dest);
2462 new_stmt = gimple_build_assign (new_temp, convert_code,
2463 prev_res, half_res);
2465 else
2467 if (ifn != IFN_LAST)
2468 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2469 else
2470 new_stmt = gimple_build_call_vec (fndecl, vargs);
2471 new_temp = make_ssa_name (vec_dest, new_stmt);
2472 gimple_call_set_lhs (new_stmt, new_temp);
2474 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2475 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2478 for (i = 0; i < nargs; i++)
2480 vec<tree> vec_oprndsi = vec_defs[i];
2481 vec_oprndsi.release ();
2483 continue;
2486 for (i = 0; i < nargs; i++)
2488 op = gimple_call_arg (stmt, i);
2489 if (j == 0)
2490 vec_oprnd0
2491 = vect_get_vec_def_for_operand (op, stmt);
2492 else
2494 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2495 vec_oprnd0
2496 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2499 vargs.quick_push (vec_oprnd0);
2502 if (gimple_call_internal_p (stmt)
2503 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2505 tree *v = XALLOCAVEC (tree, nunits_out);
2506 int k;
2507 for (k = 0; k < nunits_out; ++k)
2508 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2509 tree cst = build_vector (vectype_out, v);
2510 tree new_var
2511 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2512 gimple *init_stmt = gimple_build_assign (new_var, cst);
2513 vect_init_vector_1 (stmt, init_stmt, NULL);
2514 new_temp = make_ssa_name (vec_dest);
2515 new_stmt = gimple_build_assign (new_temp, new_var);
2517 else if (modifier == NARROW)
2519 tree half_res = make_ssa_name (vectype_in);
2520 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2521 gimple_call_set_lhs (new_stmt, half_res);
2522 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2523 if ((j & 1) == 0)
2525 prev_res = half_res;
2526 continue;
2528 new_temp = make_ssa_name (vec_dest);
2529 new_stmt = gimple_build_assign (new_temp, convert_code,
2530 prev_res, half_res);
2532 else
2534 if (ifn != IFN_LAST)
2535 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2536 else
2537 new_stmt = gimple_build_call_vec (fndecl, vargs);
2538 new_temp = make_ssa_name (vec_dest, new_stmt);
2539 gimple_call_set_lhs (new_stmt, new_temp);
2541 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2543 if (j == (modifier == NARROW ? 1 : 0))
2544 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2545 else
2546 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2548 prev_stmt_info = vinfo_for_stmt (new_stmt);
2551 else if (modifier == NARROW)
2553 for (j = 0; j < ncopies; ++j)
2555 /* Build argument list for the vectorized call. */
2556 if (j == 0)
2557 vargs.create (nargs * 2);
2558 else
2559 vargs.truncate (0);
2561 if (slp_node)
2563 auto_vec<vec<tree> > vec_defs (nargs);
2564 vec<tree> vec_oprnds0;
2566 for (i = 0; i < nargs; i++)
2567 vargs.quick_push (gimple_call_arg (stmt, i));
2568 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2569 vec_oprnds0 = vec_defs[0];
2571 /* Arguments are ready. Create the new vector stmt. */
2572 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2574 size_t k;
2575 vargs.truncate (0);
2576 for (k = 0; k < nargs; k++)
2578 vec<tree> vec_oprndsk = vec_defs[k];
2579 vargs.quick_push (vec_oprndsk[i]);
2580 vargs.quick_push (vec_oprndsk[i + 1]);
2582 if (ifn != IFN_LAST)
2583 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2584 else
2585 new_stmt = gimple_build_call_vec (fndecl, vargs);
2586 new_temp = make_ssa_name (vec_dest, new_stmt);
2587 gimple_call_set_lhs (new_stmt, new_temp);
2588 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2589 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2592 for (i = 0; i < nargs; i++)
2594 vec<tree> vec_oprndsi = vec_defs[i];
2595 vec_oprndsi.release ();
2597 continue;
2600 for (i = 0; i < nargs; i++)
2602 op = gimple_call_arg (stmt, i);
2603 if (j == 0)
2605 vec_oprnd0
2606 = vect_get_vec_def_for_operand (op, stmt);
2607 vec_oprnd1
2608 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2610 else
2612 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2613 vec_oprnd0
2614 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2615 vec_oprnd1
2616 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2619 vargs.quick_push (vec_oprnd0);
2620 vargs.quick_push (vec_oprnd1);
2623 new_stmt = gimple_build_call_vec (fndecl, vargs);
2624 new_temp = make_ssa_name (vec_dest, new_stmt);
2625 gimple_call_set_lhs (new_stmt, new_temp);
2626 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2628 if (j == 0)
2629 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2630 else
2631 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2633 prev_stmt_info = vinfo_for_stmt (new_stmt);
2636 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2638 else
2639 /* No current target implements this case. */
2640 return false;
2642 vargs.release ();
2644 /* The call in STMT might prevent it from being removed in dce.
2645 We however cannot remove it here, due to the way the ssa name
2646 it defines is mapped to the new definition. So just replace
2647 rhs of the statement with something harmless. */
2649 if (slp_node)
2650 return true;
2652 type = TREE_TYPE (scalar_dest);
2653 if (is_pattern_stmt_p (stmt_info))
2654 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2655 else
2656 lhs = gimple_call_lhs (stmt);
2658 if (gimple_call_internal_p (stmt)
2659 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2661 /* Replace uses of the lhs of GOMP_SIMD_LANE call outside the loop
2662 with vf - 1 rather than 0, that is the last iteration of the
2663 vectorized loop. */
2664 imm_use_iterator iter;
2665 use_operand_p use_p;
2666 gimple *use_stmt;
2667 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2669 basic_block use_bb = gimple_bb (use_stmt);
2670 if (use_bb
2671 && !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo), use_bb))
2673 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2674 SET_USE (use_p, build_int_cst (TREE_TYPE (lhs),
2675 ncopies * nunits_out - 1));
2676 update_stmt (use_stmt);
2681 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2682 set_vinfo_for_stmt (new_stmt, stmt_info);
2683 set_vinfo_for_stmt (stmt, NULL);
2684 STMT_VINFO_STMT (stmt_info) = new_stmt;
2685 gsi_replace (gsi, new_stmt, false);
2687 return true;
2691 struct simd_call_arg_info
2693 tree vectype;
2694 tree op;
2695 enum vect_def_type dt;
2696 HOST_WIDE_INT linear_step;
2697 unsigned int align;
2698 bool simd_lane_linear;
2701 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
2702 is linear within simd lane (but not within whole loop), note it in
2703 *ARGINFO. */
2705 static void
2706 vect_simd_lane_linear (tree op, struct loop *loop,
2707 struct simd_call_arg_info *arginfo)
2709 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
2711 if (!is_gimple_assign (def_stmt)
2712 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
2713 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
2714 return;
2716 tree base = gimple_assign_rhs1 (def_stmt);
2717 HOST_WIDE_INT linear_step = 0;
2718 tree v = gimple_assign_rhs2 (def_stmt);
2719 while (TREE_CODE (v) == SSA_NAME)
2721 tree t;
2722 def_stmt = SSA_NAME_DEF_STMT (v);
2723 if (is_gimple_assign (def_stmt))
2724 switch (gimple_assign_rhs_code (def_stmt))
2726 case PLUS_EXPR:
2727 t = gimple_assign_rhs2 (def_stmt);
2728 if (linear_step || TREE_CODE (t) != INTEGER_CST)
2729 return;
2730 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
2731 v = gimple_assign_rhs1 (def_stmt);
2732 continue;
2733 case MULT_EXPR:
2734 t = gimple_assign_rhs2 (def_stmt);
2735 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
2736 return;
2737 linear_step = tree_to_shwi (t);
2738 v = gimple_assign_rhs1 (def_stmt);
2739 continue;
2740 CASE_CONVERT:
2741 t = gimple_assign_rhs1 (def_stmt);
2742 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
2743 || (TYPE_PRECISION (TREE_TYPE (v))
2744 < TYPE_PRECISION (TREE_TYPE (t))))
2745 return;
2746 if (!linear_step)
2747 linear_step = 1;
2748 v = t;
2749 continue;
2750 default:
2751 return;
2753 else if (is_gimple_call (def_stmt)
2754 && gimple_call_internal_p (def_stmt)
2755 && gimple_call_internal_fn (def_stmt) == IFN_GOMP_SIMD_LANE
2756 && loop->simduid
2757 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
2758 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
2759 == loop->simduid))
2761 if (!linear_step)
2762 linear_step = 1;
2763 arginfo->linear_step = linear_step;
2764 arginfo->op = base;
2765 arginfo->simd_lane_linear = true;
2766 return;
2771 /* Function vectorizable_simd_clone_call.
2773 Check if STMT performs a function call that can be vectorized
2774 by calling a simd clone of the function.
2775 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2776 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2777 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2779 static bool
2780 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
2781 gimple **vec_stmt, slp_tree slp_node)
2783 tree vec_dest;
2784 tree scalar_dest;
2785 tree op, type;
2786 tree vec_oprnd0 = NULL_TREE;
2787 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2788 tree vectype;
2789 unsigned int nunits;
2790 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2791 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2792 vec_info *vinfo = stmt_info->vinfo;
2793 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2794 tree fndecl, new_temp;
2795 gimple *def_stmt;
2796 gimple *new_stmt = NULL;
2797 int ncopies, j;
2798 vec<simd_call_arg_info> arginfo = vNULL;
2799 vec<tree> vargs = vNULL;
2800 size_t i, nargs;
2801 tree lhs, rtype, ratype;
2802 vec<constructor_elt, va_gc> *ret_ctor_elts;
2804 /* Is STMT a vectorizable call? */
2805 if (!is_gimple_call (stmt))
2806 return false;
2808 fndecl = gimple_call_fndecl (stmt);
2809 if (fndecl == NULL_TREE)
2810 return false;
2812 struct cgraph_node *node = cgraph_node::get (fndecl);
2813 if (node == NULL || node->simd_clones == NULL)
2814 return false;
2816 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2817 return false;
2819 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2820 && ! vec_stmt)
2821 return false;
2823 if (gimple_call_lhs (stmt)
2824 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2825 return false;
2827 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2829 vectype = STMT_VINFO_VECTYPE (stmt_info);
2831 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2832 return false;
2834 /* FORNOW */
2835 if (slp_node || PURE_SLP_STMT (stmt_info))
2836 return false;
2838 /* Process function arguments. */
2839 nargs = gimple_call_num_args (stmt);
2841 /* Bail out if the function has zero arguments. */
2842 if (nargs == 0)
2843 return false;
2845 arginfo.create (nargs);
2847 for (i = 0; i < nargs; i++)
2849 simd_call_arg_info thisarginfo;
2850 affine_iv iv;
2852 thisarginfo.linear_step = 0;
2853 thisarginfo.align = 0;
2854 thisarginfo.op = NULL_TREE;
2855 thisarginfo.simd_lane_linear = false;
2857 op = gimple_call_arg (stmt, i);
2858 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
2859 &thisarginfo.vectype)
2860 || thisarginfo.dt == vect_uninitialized_def)
2862 if (dump_enabled_p ())
2863 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2864 "use not simple.\n");
2865 arginfo.release ();
2866 return false;
2869 if (thisarginfo.dt == vect_constant_def
2870 || thisarginfo.dt == vect_external_def)
2871 gcc_assert (thisarginfo.vectype == NULL_TREE);
2872 else
2873 gcc_assert (thisarginfo.vectype != NULL_TREE);
2875 /* For linear arguments, the analyze phase should have saved
2876 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2877 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2878 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
2880 gcc_assert (vec_stmt);
2881 thisarginfo.linear_step
2882 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
2883 thisarginfo.op
2884 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
2885 thisarginfo.simd_lane_linear
2886 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
2887 == boolean_true_node);
2888 /* If loop has been peeled for alignment, we need to adjust it. */
2889 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2890 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2891 if (n1 != n2 && !thisarginfo.simd_lane_linear)
2893 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2894 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
2895 tree opt = TREE_TYPE (thisarginfo.op);
2896 bias = fold_convert (TREE_TYPE (step), bias);
2897 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2898 thisarginfo.op
2899 = fold_build2 (POINTER_TYPE_P (opt)
2900 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2901 thisarginfo.op, bias);
2904 else if (!vec_stmt
2905 && thisarginfo.dt != vect_constant_def
2906 && thisarginfo.dt != vect_external_def
2907 && loop_vinfo
2908 && TREE_CODE (op) == SSA_NAME
2909 && simple_iv (loop, loop_containing_stmt (stmt), op,
2910 &iv, false)
2911 && tree_fits_shwi_p (iv.step))
2913 thisarginfo.linear_step = tree_to_shwi (iv.step);
2914 thisarginfo.op = iv.base;
2916 else if ((thisarginfo.dt == vect_constant_def
2917 || thisarginfo.dt == vect_external_def)
2918 && POINTER_TYPE_P (TREE_TYPE (op)))
2919 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2920 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
2921 linear too. */
2922 if (POINTER_TYPE_P (TREE_TYPE (op))
2923 && !thisarginfo.linear_step
2924 && !vec_stmt
2925 && thisarginfo.dt != vect_constant_def
2926 && thisarginfo.dt != vect_external_def
2927 && loop_vinfo
2928 && !slp_node
2929 && TREE_CODE (op) == SSA_NAME)
2930 vect_simd_lane_linear (op, loop, &thisarginfo);
2932 arginfo.quick_push (thisarginfo);
2935 unsigned int badness = 0;
2936 struct cgraph_node *bestn = NULL;
2937 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2938 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
2939 else
2940 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2941 n = n->simdclone->next_clone)
2943 unsigned int this_badness = 0;
2944 if (n->simdclone->simdlen
2945 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2946 || n->simdclone->nargs != nargs)
2947 continue;
2948 if (n->simdclone->simdlen
2949 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2950 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2951 - exact_log2 (n->simdclone->simdlen)) * 1024;
2952 if (n->simdclone->inbranch)
2953 this_badness += 2048;
2954 int target_badness = targetm.simd_clone.usable (n);
2955 if (target_badness < 0)
2956 continue;
2957 this_badness += target_badness * 512;
2958 /* FORNOW: Have to add code to add the mask argument. */
2959 if (n->simdclone->inbranch)
2960 continue;
2961 for (i = 0; i < nargs; i++)
2963 switch (n->simdclone->args[i].arg_type)
2965 case SIMD_CLONE_ARG_TYPE_VECTOR:
2966 if (!useless_type_conversion_p
2967 (n->simdclone->args[i].orig_type,
2968 TREE_TYPE (gimple_call_arg (stmt, i))))
2969 i = -1;
2970 else if (arginfo[i].dt == vect_constant_def
2971 || arginfo[i].dt == vect_external_def
2972 || arginfo[i].linear_step)
2973 this_badness += 64;
2974 break;
2975 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2976 if (arginfo[i].dt != vect_constant_def
2977 && arginfo[i].dt != vect_external_def)
2978 i = -1;
2979 break;
2980 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2981 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
2982 if (arginfo[i].dt == vect_constant_def
2983 || arginfo[i].dt == vect_external_def
2984 || (arginfo[i].linear_step
2985 != n->simdclone->args[i].linear_step))
2986 i = -1;
2987 break;
2988 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2989 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
2990 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
2991 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
2992 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
2993 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
2994 /* FORNOW */
2995 i = -1;
2996 break;
2997 case SIMD_CLONE_ARG_TYPE_MASK:
2998 gcc_unreachable ();
3000 if (i == (size_t) -1)
3001 break;
3002 if (n->simdclone->args[i].alignment > arginfo[i].align)
3004 i = -1;
3005 break;
3007 if (arginfo[i].align)
3008 this_badness += (exact_log2 (arginfo[i].align)
3009 - exact_log2 (n->simdclone->args[i].alignment));
3011 if (i == (size_t) -1)
3012 continue;
3013 if (bestn == NULL || this_badness < badness)
3015 bestn = n;
3016 badness = this_badness;
3020 if (bestn == NULL)
3022 arginfo.release ();
3023 return false;
3026 for (i = 0; i < nargs; i++)
3027 if ((arginfo[i].dt == vect_constant_def
3028 || arginfo[i].dt == vect_external_def)
3029 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3031 arginfo[i].vectype
3032 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3033 i)));
3034 if (arginfo[i].vectype == NULL
3035 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3036 > bestn->simdclone->simdlen))
3038 arginfo.release ();
3039 return false;
3043 fndecl = bestn->decl;
3044 nunits = bestn->simdclone->simdlen;
3045 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3047 /* If the function isn't const, only allow it in simd loops where user
3048 has asserted that at least nunits consecutive iterations can be
3049 performed using SIMD instructions. */
3050 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3051 && gimple_vuse (stmt))
3053 arginfo.release ();
3054 return false;
3057 /* Sanity check: make sure that at least one copy of the vectorized stmt
3058 needs to be generated. */
3059 gcc_assert (ncopies >= 1);
3061 if (!vec_stmt) /* transformation not required. */
3063 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3064 for (i = 0; i < nargs; i++)
3065 if (bestn->simdclone->args[i].arg_type
3066 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3068 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3069 + 1);
3070 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3071 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3072 ? size_type_node : TREE_TYPE (arginfo[i].op);
3073 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3074 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3075 tree sll = arginfo[i].simd_lane_linear
3076 ? boolean_true_node : boolean_false_node;
3077 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3079 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3080 if (dump_enabled_p ())
3081 dump_printf_loc (MSG_NOTE, vect_location,
3082 "=== vectorizable_simd_clone_call ===\n");
3083 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3084 arginfo.release ();
3085 return true;
3088 /** Transform. **/
3090 if (dump_enabled_p ())
3091 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3093 /* Handle def. */
3094 scalar_dest = gimple_call_lhs (stmt);
3095 vec_dest = NULL_TREE;
3096 rtype = NULL_TREE;
3097 ratype = NULL_TREE;
3098 if (scalar_dest)
3100 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3101 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3102 if (TREE_CODE (rtype) == ARRAY_TYPE)
3104 ratype = rtype;
3105 rtype = TREE_TYPE (ratype);
3109 prev_stmt_info = NULL;
3110 for (j = 0; j < ncopies; ++j)
3112 /* Build argument list for the vectorized call. */
3113 if (j == 0)
3114 vargs.create (nargs);
3115 else
3116 vargs.truncate (0);
3118 for (i = 0; i < nargs; i++)
3120 unsigned int k, l, m, o;
3121 tree atype;
3122 op = gimple_call_arg (stmt, i);
3123 switch (bestn->simdclone->args[i].arg_type)
3125 case SIMD_CLONE_ARG_TYPE_VECTOR:
3126 atype = bestn->simdclone->args[i].vector_type;
3127 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3128 for (m = j * o; m < (j + 1) * o; m++)
3130 if (TYPE_VECTOR_SUBPARTS (atype)
3131 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3133 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3134 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3135 / TYPE_VECTOR_SUBPARTS (atype));
3136 gcc_assert ((k & (k - 1)) == 0);
3137 if (m == 0)
3138 vec_oprnd0
3139 = vect_get_vec_def_for_operand (op, stmt);
3140 else
3142 vec_oprnd0 = arginfo[i].op;
3143 if ((m & (k - 1)) == 0)
3144 vec_oprnd0
3145 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3146 vec_oprnd0);
3148 arginfo[i].op = vec_oprnd0;
3149 vec_oprnd0
3150 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3151 size_int (prec),
3152 bitsize_int ((m & (k - 1)) * prec));
3153 new_stmt
3154 = gimple_build_assign (make_ssa_name (atype),
3155 vec_oprnd0);
3156 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3157 vargs.safe_push (gimple_assign_lhs (new_stmt));
3159 else
3161 k = (TYPE_VECTOR_SUBPARTS (atype)
3162 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3163 gcc_assert ((k & (k - 1)) == 0);
3164 vec<constructor_elt, va_gc> *ctor_elts;
3165 if (k != 1)
3166 vec_alloc (ctor_elts, k);
3167 else
3168 ctor_elts = NULL;
3169 for (l = 0; l < k; l++)
3171 if (m == 0 && l == 0)
3172 vec_oprnd0
3173 = vect_get_vec_def_for_operand (op, stmt);
3174 else
3175 vec_oprnd0
3176 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3177 arginfo[i].op);
3178 arginfo[i].op = vec_oprnd0;
3179 if (k == 1)
3180 break;
3181 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3182 vec_oprnd0);
3184 if (k == 1)
3185 vargs.safe_push (vec_oprnd0);
3186 else
3188 vec_oprnd0 = build_constructor (atype, ctor_elts);
3189 new_stmt
3190 = gimple_build_assign (make_ssa_name (atype),
3191 vec_oprnd0);
3192 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3193 vargs.safe_push (gimple_assign_lhs (new_stmt));
3197 break;
3198 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3199 vargs.safe_push (op);
3200 break;
3201 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3202 if (j == 0)
3204 gimple_seq stmts;
3205 arginfo[i].op
3206 = force_gimple_operand (arginfo[i].op, &stmts, true,
3207 NULL_TREE);
3208 if (stmts != NULL)
3210 basic_block new_bb;
3211 edge pe = loop_preheader_edge (loop);
3212 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3213 gcc_assert (!new_bb);
3215 if (arginfo[i].simd_lane_linear)
3217 vargs.safe_push (arginfo[i].op);
3218 break;
3220 tree phi_res = copy_ssa_name (op);
3221 gphi *new_phi = create_phi_node (phi_res, loop->header);
3222 set_vinfo_for_stmt (new_phi,
3223 new_stmt_vec_info (new_phi, loop_vinfo));
3224 add_phi_arg (new_phi, arginfo[i].op,
3225 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3226 enum tree_code code
3227 = POINTER_TYPE_P (TREE_TYPE (op))
3228 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3229 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3230 ? sizetype : TREE_TYPE (op);
3231 widest_int cst
3232 = wi::mul (bestn->simdclone->args[i].linear_step,
3233 ncopies * nunits);
3234 tree tcst = wide_int_to_tree (type, cst);
3235 tree phi_arg = copy_ssa_name (op);
3236 new_stmt
3237 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3238 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3239 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3240 set_vinfo_for_stmt (new_stmt,
3241 new_stmt_vec_info (new_stmt, loop_vinfo));
3242 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3243 UNKNOWN_LOCATION);
3244 arginfo[i].op = phi_res;
3245 vargs.safe_push (phi_res);
3247 else
3249 enum tree_code code
3250 = POINTER_TYPE_P (TREE_TYPE (op))
3251 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3252 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3253 ? sizetype : TREE_TYPE (op);
3254 widest_int cst
3255 = wi::mul (bestn->simdclone->args[i].linear_step,
3256 j * nunits);
3257 tree tcst = wide_int_to_tree (type, cst);
3258 new_temp = make_ssa_name (TREE_TYPE (op));
3259 new_stmt = gimple_build_assign (new_temp, code,
3260 arginfo[i].op, tcst);
3261 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3262 vargs.safe_push (new_temp);
3264 break;
3265 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3266 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3267 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3268 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3269 default:
3270 gcc_unreachable ();
3274 new_stmt = gimple_build_call_vec (fndecl, vargs);
3275 if (vec_dest)
3277 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3278 if (ratype)
3279 new_temp = create_tmp_var (ratype);
3280 else if (TYPE_VECTOR_SUBPARTS (vectype)
3281 == TYPE_VECTOR_SUBPARTS (rtype))
3282 new_temp = make_ssa_name (vec_dest, new_stmt);
3283 else
3284 new_temp = make_ssa_name (rtype, new_stmt);
3285 gimple_call_set_lhs (new_stmt, new_temp);
3287 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3289 if (vec_dest)
3291 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3293 unsigned int k, l;
3294 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3295 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3296 gcc_assert ((k & (k - 1)) == 0);
3297 for (l = 0; l < k; l++)
3299 tree t;
3300 if (ratype)
3302 t = build_fold_addr_expr (new_temp);
3303 t = build2 (MEM_REF, vectype, t,
3304 build_int_cst (TREE_TYPE (t),
3305 l * prec / BITS_PER_UNIT));
3307 else
3308 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3309 size_int (prec), bitsize_int (l * prec));
3310 new_stmt
3311 = gimple_build_assign (make_ssa_name (vectype), t);
3312 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3313 if (j == 0 && l == 0)
3314 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3315 else
3316 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3318 prev_stmt_info = vinfo_for_stmt (new_stmt);
3321 if (ratype)
3323 tree clobber = build_constructor (ratype, NULL);
3324 TREE_THIS_VOLATILE (clobber) = 1;
3325 new_stmt = gimple_build_assign (new_temp, clobber);
3326 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3328 continue;
3330 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3332 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3333 / TYPE_VECTOR_SUBPARTS (rtype));
3334 gcc_assert ((k & (k - 1)) == 0);
3335 if ((j & (k - 1)) == 0)
3336 vec_alloc (ret_ctor_elts, k);
3337 if (ratype)
3339 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3340 for (m = 0; m < o; m++)
3342 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3343 size_int (m), NULL_TREE, NULL_TREE);
3344 new_stmt
3345 = gimple_build_assign (make_ssa_name (rtype), tem);
3346 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3347 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3348 gimple_assign_lhs (new_stmt));
3350 tree clobber = build_constructor (ratype, NULL);
3351 TREE_THIS_VOLATILE (clobber) = 1;
3352 new_stmt = gimple_build_assign (new_temp, clobber);
3353 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3355 else
3356 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3357 if ((j & (k - 1)) != k - 1)
3358 continue;
3359 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3360 new_stmt
3361 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3362 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3364 if ((unsigned) j == k - 1)
3365 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3366 else
3367 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3369 prev_stmt_info = vinfo_for_stmt (new_stmt);
3370 continue;
3372 else if (ratype)
3374 tree t = build_fold_addr_expr (new_temp);
3375 t = build2 (MEM_REF, vectype, t,
3376 build_int_cst (TREE_TYPE (t), 0));
3377 new_stmt
3378 = gimple_build_assign (make_ssa_name (vec_dest), t);
3379 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3380 tree clobber = build_constructor (ratype, NULL);
3381 TREE_THIS_VOLATILE (clobber) = 1;
3382 vect_finish_stmt_generation (stmt,
3383 gimple_build_assign (new_temp,
3384 clobber), gsi);
3388 if (j == 0)
3389 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3390 else
3391 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3393 prev_stmt_info = vinfo_for_stmt (new_stmt);
3396 vargs.release ();
3398 /* The call in STMT might prevent it from being removed in dce.
3399 We however cannot remove it here, due to the way the ssa name
3400 it defines is mapped to the new definition. So just replace
3401 rhs of the statement with something harmless. */
3403 if (slp_node)
3404 return true;
3406 if (scalar_dest)
3408 type = TREE_TYPE (scalar_dest);
3409 if (is_pattern_stmt_p (stmt_info))
3410 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3411 else
3412 lhs = gimple_call_lhs (stmt);
3413 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3415 else
3416 new_stmt = gimple_build_nop ();
3417 set_vinfo_for_stmt (new_stmt, stmt_info);
3418 set_vinfo_for_stmt (stmt, NULL);
3419 STMT_VINFO_STMT (stmt_info) = new_stmt;
3420 gsi_replace (gsi, new_stmt, true);
3421 unlink_stmt_vdef (stmt);
3423 return true;
3427 /* Function vect_gen_widened_results_half
3429 Create a vector stmt whose code, type, number of arguments, and result
3430 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3431 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3432 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3433 needs to be created (DECL is a function-decl of a target-builtin).
3434 STMT is the original scalar stmt that we are vectorizing. */
3436 static gimple *
3437 vect_gen_widened_results_half (enum tree_code code,
3438 tree decl,
3439 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3440 tree vec_dest, gimple_stmt_iterator *gsi,
3441 gimple *stmt)
3443 gimple *new_stmt;
3444 tree new_temp;
3446 /* Generate half of the widened result: */
3447 if (code == CALL_EXPR)
3449 /* Target specific support */
3450 if (op_type == binary_op)
3451 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3452 else
3453 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3454 new_temp = make_ssa_name (vec_dest, new_stmt);
3455 gimple_call_set_lhs (new_stmt, new_temp);
3457 else
3459 /* Generic support */
3460 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3461 if (op_type != binary_op)
3462 vec_oprnd1 = NULL;
3463 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3464 new_temp = make_ssa_name (vec_dest, new_stmt);
3465 gimple_assign_set_lhs (new_stmt, new_temp);
3467 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3469 return new_stmt;
3473 /* Get vectorized definitions for loop-based vectorization. For the first
3474 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3475 scalar operand), and for the rest we get a copy with
3476 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3477 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3478 The vectors are collected into VEC_OPRNDS. */
3480 static void
3481 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3482 vec<tree> *vec_oprnds, int multi_step_cvt)
3484 tree vec_oprnd;
3486 /* Get first vector operand. */
3487 /* All the vector operands except the very first one (that is scalar oprnd)
3488 are stmt copies. */
3489 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3490 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3491 else
3492 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3494 vec_oprnds->quick_push (vec_oprnd);
3496 /* Get second vector operand. */
3497 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3498 vec_oprnds->quick_push (vec_oprnd);
3500 *oprnd = vec_oprnd;
3502 /* For conversion in multiple steps, continue to get operands
3503 recursively. */
3504 if (multi_step_cvt)
3505 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3509 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3510 For multi-step conversions store the resulting vectors and call the function
3511 recursively. */
3513 static void
3514 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3515 int multi_step_cvt, gimple *stmt,
3516 vec<tree> vec_dsts,
3517 gimple_stmt_iterator *gsi,
3518 slp_tree slp_node, enum tree_code code,
3519 stmt_vec_info *prev_stmt_info)
3521 unsigned int i;
3522 tree vop0, vop1, new_tmp, vec_dest;
3523 gimple *new_stmt;
3524 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3526 vec_dest = vec_dsts.pop ();
3528 for (i = 0; i < vec_oprnds->length (); i += 2)
3530 /* Create demotion operation. */
3531 vop0 = (*vec_oprnds)[i];
3532 vop1 = (*vec_oprnds)[i + 1];
3533 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3534 new_tmp = make_ssa_name (vec_dest, new_stmt);
3535 gimple_assign_set_lhs (new_stmt, new_tmp);
3536 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3538 if (multi_step_cvt)
3539 /* Store the resulting vector for next recursive call. */
3540 (*vec_oprnds)[i/2] = new_tmp;
3541 else
3543 /* This is the last step of the conversion sequence. Store the
3544 vectors in SLP_NODE or in vector info of the scalar statement
3545 (or in STMT_VINFO_RELATED_STMT chain). */
3546 if (slp_node)
3547 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3548 else
3550 if (!*prev_stmt_info)
3551 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3552 else
3553 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3555 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3560 /* For multi-step demotion operations we first generate demotion operations
3561 from the source type to the intermediate types, and then combine the
3562 results (stored in VEC_OPRNDS) in demotion operation to the destination
3563 type. */
3564 if (multi_step_cvt)
3566 /* At each level of recursion we have half of the operands we had at the
3567 previous level. */
3568 vec_oprnds->truncate ((i+1)/2);
3569 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3570 stmt, vec_dsts, gsi, slp_node,
3571 VEC_PACK_TRUNC_EXPR,
3572 prev_stmt_info);
3575 vec_dsts.quick_push (vec_dest);
3579 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3580 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3581 the resulting vectors and call the function recursively. */
3583 static void
3584 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3585 vec<tree> *vec_oprnds1,
3586 gimple *stmt, tree vec_dest,
3587 gimple_stmt_iterator *gsi,
3588 enum tree_code code1,
3589 enum tree_code code2, tree decl1,
3590 tree decl2, int op_type)
3592 int i;
3593 tree vop0, vop1, new_tmp1, new_tmp2;
3594 gimple *new_stmt1, *new_stmt2;
3595 vec<tree> vec_tmp = vNULL;
3597 vec_tmp.create (vec_oprnds0->length () * 2);
3598 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3600 if (op_type == binary_op)
3601 vop1 = (*vec_oprnds1)[i];
3602 else
3603 vop1 = NULL_TREE;
3605 /* Generate the two halves of promotion operation. */
3606 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3607 op_type, vec_dest, gsi, stmt);
3608 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3609 op_type, vec_dest, gsi, stmt);
3610 if (is_gimple_call (new_stmt1))
3612 new_tmp1 = gimple_call_lhs (new_stmt1);
3613 new_tmp2 = gimple_call_lhs (new_stmt2);
3615 else
3617 new_tmp1 = gimple_assign_lhs (new_stmt1);
3618 new_tmp2 = gimple_assign_lhs (new_stmt2);
3621 /* Store the results for the next step. */
3622 vec_tmp.quick_push (new_tmp1);
3623 vec_tmp.quick_push (new_tmp2);
3626 vec_oprnds0->release ();
3627 *vec_oprnds0 = vec_tmp;
3631 /* Check if STMT performs a conversion operation, that can be vectorized.
3632 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3633 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3634 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3636 static bool
3637 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
3638 gimple **vec_stmt, slp_tree slp_node)
3640 tree vec_dest;
3641 tree scalar_dest;
3642 tree op0, op1 = NULL_TREE;
3643 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3644 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3645 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3646 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3647 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3648 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3649 tree new_temp;
3650 gimple *def_stmt;
3651 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3652 gimple *new_stmt = NULL;
3653 stmt_vec_info prev_stmt_info;
3654 int nunits_in;
3655 int nunits_out;
3656 tree vectype_out, vectype_in;
3657 int ncopies, i, j;
3658 tree lhs_type, rhs_type;
3659 enum { NARROW, NONE, WIDEN } modifier;
3660 vec<tree> vec_oprnds0 = vNULL;
3661 vec<tree> vec_oprnds1 = vNULL;
3662 tree vop0;
3663 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3664 vec_info *vinfo = stmt_info->vinfo;
3665 int multi_step_cvt = 0;
3666 vec<tree> vec_dsts = vNULL;
3667 vec<tree> interm_types = vNULL;
3668 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3669 int op_type;
3670 machine_mode rhs_mode;
3671 unsigned short fltsz;
3673 /* Is STMT a vectorizable conversion? */
3675 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3676 return false;
3678 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3679 && ! vec_stmt)
3680 return false;
3682 if (!is_gimple_assign (stmt))
3683 return false;
3685 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3686 return false;
3688 code = gimple_assign_rhs_code (stmt);
3689 if (!CONVERT_EXPR_CODE_P (code)
3690 && code != FIX_TRUNC_EXPR
3691 && code != FLOAT_EXPR
3692 && code != WIDEN_MULT_EXPR
3693 && code != WIDEN_LSHIFT_EXPR)
3694 return false;
3696 op_type = TREE_CODE_LENGTH (code);
3698 /* Check types of lhs and rhs. */
3699 scalar_dest = gimple_assign_lhs (stmt);
3700 lhs_type = TREE_TYPE (scalar_dest);
3701 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3703 op0 = gimple_assign_rhs1 (stmt);
3704 rhs_type = TREE_TYPE (op0);
3706 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3707 && !((INTEGRAL_TYPE_P (lhs_type)
3708 && INTEGRAL_TYPE_P (rhs_type))
3709 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3710 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3711 return false;
3713 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
3714 && ((INTEGRAL_TYPE_P (lhs_type)
3715 && (TYPE_PRECISION (lhs_type)
3716 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3717 || (INTEGRAL_TYPE_P (rhs_type)
3718 && (TYPE_PRECISION (rhs_type)
3719 != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))))
3721 if (dump_enabled_p ())
3722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3723 "type conversion to/from bit-precision unsupported."
3724 "\n");
3725 return false;
3728 /* Check the operands of the operation. */
3729 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
3731 if (dump_enabled_p ())
3732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3733 "use not simple.\n");
3734 return false;
3736 if (op_type == binary_op)
3738 bool ok;
3740 op1 = gimple_assign_rhs2 (stmt);
3741 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3742 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3743 OP1. */
3744 if (CONSTANT_CLASS_P (op0))
3745 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
3746 else
3747 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
3749 if (!ok)
3751 if (dump_enabled_p ())
3752 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3753 "use not simple.\n");
3754 return false;
3758 /* If op0 is an external or constant defs use a vector type of
3759 the same size as the output vector type. */
3760 if (!vectype_in)
3761 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3762 if (vec_stmt)
3763 gcc_assert (vectype_in);
3764 if (!vectype_in)
3766 if (dump_enabled_p ())
3768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3769 "no vectype for scalar type ");
3770 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3771 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3774 return false;
3777 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3778 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
3780 if (dump_enabled_p ())
3782 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3783 "can't convert between boolean and non "
3784 "boolean vectors");
3785 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3786 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3789 return false;
3792 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3793 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3794 if (nunits_in < nunits_out)
3795 modifier = NARROW;
3796 else if (nunits_out == nunits_in)
3797 modifier = NONE;
3798 else
3799 modifier = WIDEN;
3801 /* Multiple types in SLP are handled by creating the appropriate number of
3802 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3803 case of SLP. */
3804 if (slp_node || PURE_SLP_STMT (stmt_info))
3805 ncopies = 1;
3806 else if (modifier == NARROW)
3807 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3808 else
3809 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3811 /* Sanity check: make sure that at least one copy of the vectorized stmt
3812 needs to be generated. */
3813 gcc_assert (ncopies >= 1);
3815 /* Supportable by target? */
3816 switch (modifier)
3818 case NONE:
3819 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3820 return false;
3821 if (supportable_convert_operation (code, vectype_out, vectype_in,
3822 &decl1, &code1))
3823 break;
3824 /* FALLTHRU */
3825 unsupported:
3826 if (dump_enabled_p ())
3827 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3828 "conversion not supported by target.\n");
3829 return false;
3831 case WIDEN:
3832 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3833 &code1, &code2, &multi_step_cvt,
3834 &interm_types))
3836 /* Binary widening operation can only be supported directly by the
3837 architecture. */
3838 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3839 break;
3842 if (code != FLOAT_EXPR
3843 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3844 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3845 goto unsupported;
3847 rhs_mode = TYPE_MODE (rhs_type);
3848 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3849 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3850 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3851 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3853 cvt_type
3854 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3855 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3856 if (cvt_type == NULL_TREE)
3857 goto unsupported;
3859 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3861 if (!supportable_convert_operation (code, vectype_out,
3862 cvt_type, &decl1, &codecvt1))
3863 goto unsupported;
3865 else if (!supportable_widening_operation (code, stmt, vectype_out,
3866 cvt_type, &codecvt1,
3867 &codecvt2, &multi_step_cvt,
3868 &interm_types))
3869 continue;
3870 else
3871 gcc_assert (multi_step_cvt == 0);
3873 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3874 vectype_in, &code1, &code2,
3875 &multi_step_cvt, &interm_types))
3876 break;
3879 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3880 goto unsupported;
3882 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3883 codecvt2 = ERROR_MARK;
3884 else
3886 multi_step_cvt++;
3887 interm_types.safe_push (cvt_type);
3888 cvt_type = NULL_TREE;
3890 break;
3892 case NARROW:
3893 gcc_assert (op_type == unary_op);
3894 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3895 &code1, &multi_step_cvt,
3896 &interm_types))
3897 break;
3899 if (code != FIX_TRUNC_EXPR
3900 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3901 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3902 goto unsupported;
3904 rhs_mode = TYPE_MODE (rhs_type);
3905 cvt_type
3906 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3907 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3908 if (cvt_type == NULL_TREE)
3909 goto unsupported;
3910 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3911 &decl1, &codecvt1))
3912 goto unsupported;
3913 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3914 &code1, &multi_step_cvt,
3915 &interm_types))
3916 break;
3917 goto unsupported;
3919 default:
3920 gcc_unreachable ();
3923 if (!vec_stmt) /* transformation not required. */
3925 if (dump_enabled_p ())
3926 dump_printf_loc (MSG_NOTE, vect_location,
3927 "=== vectorizable_conversion ===\n");
3928 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3930 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3931 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3933 else if (modifier == NARROW)
3935 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3936 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3938 else
3940 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3941 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3943 interm_types.release ();
3944 return true;
3947 /** Transform. **/
3948 if (dump_enabled_p ())
3949 dump_printf_loc (MSG_NOTE, vect_location,
3950 "transform conversion. ncopies = %d.\n", ncopies);
3952 if (op_type == binary_op)
3954 if (CONSTANT_CLASS_P (op0))
3955 op0 = fold_convert (TREE_TYPE (op1), op0);
3956 else if (CONSTANT_CLASS_P (op1))
3957 op1 = fold_convert (TREE_TYPE (op0), op1);
3960 /* In case of multi-step conversion, we first generate conversion operations
3961 to the intermediate types, and then from that types to the final one.
3962 We create vector destinations for the intermediate type (TYPES) received
3963 from supportable_*_operation, and store them in the correct order
3964 for future use in vect_create_vectorized_*_stmts (). */
3965 vec_dsts.create (multi_step_cvt + 1);
3966 vec_dest = vect_create_destination_var (scalar_dest,
3967 (cvt_type && modifier == WIDEN)
3968 ? cvt_type : vectype_out);
3969 vec_dsts.quick_push (vec_dest);
3971 if (multi_step_cvt)
3973 for (i = interm_types.length () - 1;
3974 interm_types.iterate (i, &intermediate_type); i--)
3976 vec_dest = vect_create_destination_var (scalar_dest,
3977 intermediate_type);
3978 vec_dsts.quick_push (vec_dest);
3982 if (cvt_type)
3983 vec_dest = vect_create_destination_var (scalar_dest,
3984 modifier == WIDEN
3985 ? vectype_out : cvt_type);
3987 if (!slp_node)
3989 if (modifier == WIDEN)
3991 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3992 if (op_type == binary_op)
3993 vec_oprnds1.create (1);
3995 else if (modifier == NARROW)
3996 vec_oprnds0.create (
3997 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3999 else if (code == WIDEN_LSHIFT_EXPR)
4000 vec_oprnds1.create (slp_node->vec_stmts_size);
4002 last_oprnd = op0;
4003 prev_stmt_info = NULL;
4004 switch (modifier)
4006 case NONE:
4007 for (j = 0; j < ncopies; j++)
4009 if (j == 0)
4010 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
4011 -1);
4012 else
4013 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4015 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4017 /* Arguments are ready, create the new vector stmt. */
4018 if (code1 == CALL_EXPR)
4020 new_stmt = gimple_build_call (decl1, 1, vop0);
4021 new_temp = make_ssa_name (vec_dest, new_stmt);
4022 gimple_call_set_lhs (new_stmt, new_temp);
4024 else
4026 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4027 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4028 new_temp = make_ssa_name (vec_dest, new_stmt);
4029 gimple_assign_set_lhs (new_stmt, new_temp);
4032 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4033 if (slp_node)
4034 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4035 else
4037 if (!prev_stmt_info)
4038 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4039 else
4040 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4041 prev_stmt_info = vinfo_for_stmt (new_stmt);
4045 break;
4047 case WIDEN:
4048 /* In case the vectorization factor (VF) is bigger than the number
4049 of elements that we can fit in a vectype (nunits), we have to
4050 generate more than one vector stmt - i.e - we need to "unroll"
4051 the vector stmt by a factor VF/nunits. */
4052 for (j = 0; j < ncopies; j++)
4054 /* Handle uses. */
4055 if (j == 0)
4057 if (slp_node)
4059 if (code == WIDEN_LSHIFT_EXPR)
4061 unsigned int k;
4063 vec_oprnd1 = op1;
4064 /* Store vec_oprnd1 for every vector stmt to be created
4065 for SLP_NODE. We check during the analysis that all
4066 the shift arguments are the same. */
4067 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4068 vec_oprnds1.quick_push (vec_oprnd1);
4070 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4071 slp_node, -1);
4073 else
4074 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4075 &vec_oprnds1, slp_node, -1);
4077 else
4079 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4080 vec_oprnds0.quick_push (vec_oprnd0);
4081 if (op_type == binary_op)
4083 if (code == WIDEN_LSHIFT_EXPR)
4084 vec_oprnd1 = op1;
4085 else
4086 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4087 vec_oprnds1.quick_push (vec_oprnd1);
4091 else
4093 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4094 vec_oprnds0.truncate (0);
4095 vec_oprnds0.quick_push (vec_oprnd0);
4096 if (op_type == binary_op)
4098 if (code == WIDEN_LSHIFT_EXPR)
4099 vec_oprnd1 = op1;
4100 else
4101 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4102 vec_oprnd1);
4103 vec_oprnds1.truncate (0);
4104 vec_oprnds1.quick_push (vec_oprnd1);
4108 /* Arguments are ready. Create the new vector stmts. */
4109 for (i = multi_step_cvt; i >= 0; i--)
4111 tree this_dest = vec_dsts[i];
4112 enum tree_code c1 = code1, c2 = code2;
4113 if (i == 0 && codecvt2 != ERROR_MARK)
4115 c1 = codecvt1;
4116 c2 = codecvt2;
4118 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4119 &vec_oprnds1,
4120 stmt, this_dest, gsi,
4121 c1, c2, decl1, decl2,
4122 op_type);
4125 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4127 if (cvt_type)
4129 if (codecvt1 == CALL_EXPR)
4131 new_stmt = gimple_build_call (decl1, 1, vop0);
4132 new_temp = make_ssa_name (vec_dest, new_stmt);
4133 gimple_call_set_lhs (new_stmt, new_temp);
4135 else
4137 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4138 new_temp = make_ssa_name (vec_dest);
4139 new_stmt = gimple_build_assign (new_temp, codecvt1,
4140 vop0);
4143 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4145 else
4146 new_stmt = SSA_NAME_DEF_STMT (vop0);
4148 if (slp_node)
4149 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4150 else
4152 if (!prev_stmt_info)
4153 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4154 else
4155 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4156 prev_stmt_info = vinfo_for_stmt (new_stmt);
4161 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4162 break;
4164 case NARROW:
4165 /* In case the vectorization factor (VF) is bigger than the number
4166 of elements that we can fit in a vectype (nunits), we have to
4167 generate more than one vector stmt - i.e - we need to "unroll"
4168 the vector stmt by a factor VF/nunits. */
4169 for (j = 0; j < ncopies; j++)
4171 /* Handle uses. */
4172 if (slp_node)
4173 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4174 slp_node, -1);
4175 else
4177 vec_oprnds0.truncate (0);
4178 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4179 vect_pow2 (multi_step_cvt) - 1);
4182 /* Arguments are ready. Create the new vector stmts. */
4183 if (cvt_type)
4184 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4186 if (codecvt1 == CALL_EXPR)
4188 new_stmt = gimple_build_call (decl1, 1, vop0);
4189 new_temp = make_ssa_name (vec_dest, new_stmt);
4190 gimple_call_set_lhs (new_stmt, new_temp);
4192 else
4194 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4195 new_temp = make_ssa_name (vec_dest);
4196 new_stmt = gimple_build_assign (new_temp, codecvt1,
4197 vop0);
4200 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4201 vec_oprnds0[i] = new_temp;
4204 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4205 stmt, vec_dsts, gsi,
4206 slp_node, code1,
4207 &prev_stmt_info);
4210 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4211 break;
4214 vec_oprnds0.release ();
4215 vec_oprnds1.release ();
4216 vec_dsts.release ();
4217 interm_types.release ();
4219 return true;
4223 /* Function vectorizable_assignment.
4225 Check if STMT performs an assignment (copy) that can be vectorized.
4226 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4227 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4228 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4230 static bool
4231 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4232 gimple **vec_stmt, slp_tree slp_node)
4234 tree vec_dest;
4235 tree scalar_dest;
4236 tree op;
4237 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4238 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4239 tree new_temp;
4240 gimple *def_stmt;
4241 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4242 int ncopies;
4243 int i, j;
4244 vec<tree> vec_oprnds = vNULL;
4245 tree vop;
4246 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4247 vec_info *vinfo = stmt_info->vinfo;
4248 gimple *new_stmt = NULL;
4249 stmt_vec_info prev_stmt_info = NULL;
4250 enum tree_code code;
4251 tree vectype_in;
4253 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4254 return false;
4256 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4257 && ! vec_stmt)
4258 return false;
4260 /* Is vectorizable assignment? */
4261 if (!is_gimple_assign (stmt))
4262 return false;
4264 scalar_dest = gimple_assign_lhs (stmt);
4265 if (TREE_CODE (scalar_dest) != SSA_NAME)
4266 return false;
4268 code = gimple_assign_rhs_code (stmt);
4269 if (gimple_assign_single_p (stmt)
4270 || code == PAREN_EXPR
4271 || CONVERT_EXPR_CODE_P (code))
4272 op = gimple_assign_rhs1 (stmt);
4273 else
4274 return false;
4276 if (code == VIEW_CONVERT_EXPR)
4277 op = TREE_OPERAND (op, 0);
4279 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4280 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4282 /* Multiple types in SLP are handled by creating the appropriate number of
4283 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4284 case of SLP. */
4285 if (slp_node || PURE_SLP_STMT (stmt_info))
4286 ncopies = 1;
4287 else
4288 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4290 gcc_assert (ncopies >= 1);
4292 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4294 if (dump_enabled_p ())
4295 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4296 "use not simple.\n");
4297 return false;
4300 /* We can handle NOP_EXPR conversions that do not change the number
4301 of elements or the vector size. */
4302 if ((CONVERT_EXPR_CODE_P (code)
4303 || code == VIEW_CONVERT_EXPR)
4304 && (!vectype_in
4305 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4306 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4307 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4308 return false;
4310 /* We do not handle bit-precision changes. */
4311 if ((CONVERT_EXPR_CODE_P (code)
4312 || code == VIEW_CONVERT_EXPR)
4313 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4314 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4315 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4316 || ((TYPE_PRECISION (TREE_TYPE (op))
4317 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4318 /* But a conversion that does not change the bit-pattern is ok. */
4319 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4320 > TYPE_PRECISION (TREE_TYPE (op)))
4321 && TYPE_UNSIGNED (TREE_TYPE (op)))
4322 /* Conversion between boolean types of different sizes is
4323 a simple assignment in case their vectypes are same
4324 boolean vectors. */
4325 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4326 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4328 if (dump_enabled_p ())
4329 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4330 "type conversion to/from bit-precision "
4331 "unsupported.\n");
4332 return false;
4335 if (!vec_stmt) /* transformation not required. */
4337 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4338 if (dump_enabled_p ())
4339 dump_printf_loc (MSG_NOTE, vect_location,
4340 "=== vectorizable_assignment ===\n");
4341 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4342 return true;
4345 /** Transform. **/
4346 if (dump_enabled_p ())
4347 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4349 /* Handle def. */
4350 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4352 /* Handle use. */
4353 for (j = 0; j < ncopies; j++)
4355 /* Handle uses. */
4356 if (j == 0)
4357 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4358 else
4359 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4361 /* Arguments are ready. create the new vector stmt. */
4362 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4364 if (CONVERT_EXPR_CODE_P (code)
4365 || code == VIEW_CONVERT_EXPR)
4366 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4367 new_stmt = gimple_build_assign (vec_dest, vop);
4368 new_temp = make_ssa_name (vec_dest, new_stmt);
4369 gimple_assign_set_lhs (new_stmt, new_temp);
4370 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4371 if (slp_node)
4372 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4375 if (slp_node)
4376 continue;
4378 if (j == 0)
4379 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4380 else
4381 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4383 prev_stmt_info = vinfo_for_stmt (new_stmt);
4386 vec_oprnds.release ();
4387 return true;
4391 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4392 either as shift by a scalar or by a vector. */
4394 bool
4395 vect_supportable_shift (enum tree_code code, tree scalar_type)
4398 machine_mode vec_mode;
4399 optab optab;
4400 int icode;
4401 tree vectype;
4403 vectype = get_vectype_for_scalar_type (scalar_type);
4404 if (!vectype)
4405 return false;
4407 optab = optab_for_tree_code (code, vectype, optab_scalar);
4408 if (!optab
4409 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4411 optab = optab_for_tree_code (code, vectype, optab_vector);
4412 if (!optab
4413 || (optab_handler (optab, TYPE_MODE (vectype))
4414 == CODE_FOR_nothing))
4415 return false;
4418 vec_mode = TYPE_MODE (vectype);
4419 icode = (int) optab_handler (optab, vec_mode);
4420 if (icode == CODE_FOR_nothing)
4421 return false;
4423 return true;
4427 /* Function vectorizable_shift.
4429 Check if STMT performs a shift operation that can be vectorized.
4430 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4431 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4432 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4434 static bool
4435 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4436 gimple **vec_stmt, slp_tree slp_node)
4438 tree vec_dest;
4439 tree scalar_dest;
4440 tree op0, op1 = NULL;
4441 tree vec_oprnd1 = NULL_TREE;
4442 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4443 tree vectype;
4444 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4445 enum tree_code code;
4446 machine_mode vec_mode;
4447 tree new_temp;
4448 optab optab;
4449 int icode;
4450 machine_mode optab_op2_mode;
4451 gimple *def_stmt;
4452 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4453 gimple *new_stmt = NULL;
4454 stmt_vec_info prev_stmt_info;
4455 int nunits_in;
4456 int nunits_out;
4457 tree vectype_out;
4458 tree op1_vectype;
4459 int ncopies;
4460 int j, i;
4461 vec<tree> vec_oprnds0 = vNULL;
4462 vec<tree> vec_oprnds1 = vNULL;
4463 tree vop0, vop1;
4464 unsigned int k;
4465 bool scalar_shift_arg = true;
4466 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4467 vec_info *vinfo = stmt_info->vinfo;
4468 int vf;
4470 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4471 return false;
4473 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4474 && ! vec_stmt)
4475 return false;
4477 /* Is STMT a vectorizable binary/unary operation? */
4478 if (!is_gimple_assign (stmt))
4479 return false;
4481 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4482 return false;
4484 code = gimple_assign_rhs_code (stmt);
4486 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4487 || code == RROTATE_EXPR))
4488 return false;
4490 scalar_dest = gimple_assign_lhs (stmt);
4491 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4492 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4493 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4495 if (dump_enabled_p ())
4496 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4497 "bit-precision shifts not supported.\n");
4498 return false;
4501 op0 = gimple_assign_rhs1 (stmt);
4502 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4504 if (dump_enabled_p ())
4505 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4506 "use not simple.\n");
4507 return false;
4509 /* If op0 is an external or constant def use a vector type with
4510 the same size as the output vector type. */
4511 if (!vectype)
4512 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4513 if (vec_stmt)
4514 gcc_assert (vectype);
4515 if (!vectype)
4517 if (dump_enabled_p ())
4518 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4519 "no vectype for scalar type\n");
4520 return false;
4523 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4524 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4525 if (nunits_out != nunits_in)
4526 return false;
4528 op1 = gimple_assign_rhs2 (stmt);
4529 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4531 if (dump_enabled_p ())
4532 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4533 "use not simple.\n");
4534 return false;
4537 if (loop_vinfo)
4538 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4539 else
4540 vf = 1;
4542 /* Multiple types in SLP are handled by creating the appropriate number of
4543 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4544 case of SLP. */
4545 if (slp_node || PURE_SLP_STMT (stmt_info))
4546 ncopies = 1;
4547 else
4548 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4550 gcc_assert (ncopies >= 1);
4552 /* Determine whether the shift amount is a vector, or scalar. If the
4553 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4555 if ((dt[1] == vect_internal_def
4556 || dt[1] == vect_induction_def)
4557 && !slp_node)
4558 scalar_shift_arg = false;
4559 else if (dt[1] == vect_constant_def
4560 || dt[1] == vect_external_def
4561 || dt[1] == vect_internal_def)
4563 /* In SLP, need to check whether the shift count is the same,
4564 in loops if it is a constant or invariant, it is always
4565 a scalar shift. */
4566 if (slp_node)
4568 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4569 gimple *slpstmt;
4571 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4572 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4573 scalar_shift_arg = false;
4576 else
4578 if (dump_enabled_p ())
4579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4580 "operand mode requires invariant argument.\n");
4581 return false;
4584 /* Vector shifted by vector. */
4585 if (!scalar_shift_arg)
4587 optab = optab_for_tree_code (code, vectype, optab_vector);
4588 if (dump_enabled_p ())
4589 dump_printf_loc (MSG_NOTE, vect_location,
4590 "vector/vector shift/rotate found.\n");
4592 if (!op1_vectype)
4593 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4594 if (op1_vectype == NULL_TREE
4595 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4597 if (dump_enabled_p ())
4598 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4599 "unusable type for last operand in"
4600 " vector/vector shift/rotate.\n");
4601 return false;
4604 /* See if the machine has a vector shifted by scalar insn and if not
4605 then see if it has a vector shifted by vector insn. */
4606 else
4608 optab = optab_for_tree_code (code, vectype, optab_scalar);
4609 if (optab
4610 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4612 if (dump_enabled_p ())
4613 dump_printf_loc (MSG_NOTE, vect_location,
4614 "vector/scalar shift/rotate found.\n");
4616 else
4618 optab = optab_for_tree_code (code, vectype, optab_vector);
4619 if (optab
4620 && (optab_handler (optab, TYPE_MODE (vectype))
4621 != CODE_FOR_nothing))
4623 scalar_shift_arg = false;
4625 if (dump_enabled_p ())
4626 dump_printf_loc (MSG_NOTE, vect_location,
4627 "vector/vector shift/rotate found.\n");
4629 /* Unlike the other binary operators, shifts/rotates have
4630 the rhs being int, instead of the same type as the lhs,
4631 so make sure the scalar is the right type if we are
4632 dealing with vectors of long long/long/short/char. */
4633 if (dt[1] == vect_constant_def)
4634 op1 = fold_convert (TREE_TYPE (vectype), op1);
4635 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4636 TREE_TYPE (op1)))
4638 if (slp_node
4639 && TYPE_MODE (TREE_TYPE (vectype))
4640 != TYPE_MODE (TREE_TYPE (op1)))
4642 if (dump_enabled_p ())
4643 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4644 "unusable type for last operand in"
4645 " vector/vector shift/rotate.\n");
4646 return false;
4648 if (vec_stmt && !slp_node)
4650 op1 = fold_convert (TREE_TYPE (vectype), op1);
4651 op1 = vect_init_vector (stmt, op1,
4652 TREE_TYPE (vectype), NULL);
4659 /* Supportable by target? */
4660 if (!optab)
4662 if (dump_enabled_p ())
4663 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4664 "no optab.\n");
4665 return false;
4667 vec_mode = TYPE_MODE (vectype);
4668 icode = (int) optab_handler (optab, vec_mode);
4669 if (icode == CODE_FOR_nothing)
4671 if (dump_enabled_p ())
4672 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4673 "op not supported by target.\n");
4674 /* Check only during analysis. */
4675 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4676 || (vf < vect_min_worthwhile_factor (code)
4677 && !vec_stmt))
4678 return false;
4679 if (dump_enabled_p ())
4680 dump_printf_loc (MSG_NOTE, vect_location,
4681 "proceeding using word mode.\n");
4684 /* Worthwhile without SIMD support? Check only during analysis. */
4685 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4686 && vf < vect_min_worthwhile_factor (code)
4687 && !vec_stmt)
4689 if (dump_enabled_p ())
4690 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4691 "not worthwhile without SIMD support.\n");
4692 return false;
4695 if (!vec_stmt) /* transformation not required. */
4697 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4698 if (dump_enabled_p ())
4699 dump_printf_loc (MSG_NOTE, vect_location,
4700 "=== vectorizable_shift ===\n");
4701 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4702 return true;
4705 /** Transform. **/
4707 if (dump_enabled_p ())
4708 dump_printf_loc (MSG_NOTE, vect_location,
4709 "transform binary/unary operation.\n");
4711 /* Handle def. */
4712 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4714 prev_stmt_info = NULL;
4715 for (j = 0; j < ncopies; j++)
4717 /* Handle uses. */
4718 if (j == 0)
4720 if (scalar_shift_arg)
4722 /* Vector shl and shr insn patterns can be defined with scalar
4723 operand 2 (shift operand). In this case, use constant or loop
4724 invariant op1 directly, without extending it to vector mode
4725 first. */
4726 optab_op2_mode = insn_data[icode].operand[2].mode;
4727 if (!VECTOR_MODE_P (optab_op2_mode))
4729 if (dump_enabled_p ())
4730 dump_printf_loc (MSG_NOTE, vect_location,
4731 "operand 1 using scalar mode.\n");
4732 vec_oprnd1 = op1;
4733 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4734 vec_oprnds1.quick_push (vec_oprnd1);
4735 if (slp_node)
4737 /* Store vec_oprnd1 for every vector stmt to be created
4738 for SLP_NODE. We check during the analysis that all
4739 the shift arguments are the same.
4740 TODO: Allow different constants for different vector
4741 stmts generated for an SLP instance. */
4742 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4743 vec_oprnds1.quick_push (vec_oprnd1);
4748 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4749 (a special case for certain kind of vector shifts); otherwise,
4750 operand 1 should be of a vector type (the usual case). */
4751 if (vec_oprnd1)
4752 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4753 slp_node, -1);
4754 else
4755 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4756 slp_node, -1);
4758 else
4759 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4761 /* Arguments are ready. Create the new vector stmt. */
4762 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4764 vop1 = vec_oprnds1[i];
4765 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4766 new_temp = make_ssa_name (vec_dest, new_stmt);
4767 gimple_assign_set_lhs (new_stmt, new_temp);
4768 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4769 if (slp_node)
4770 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4773 if (slp_node)
4774 continue;
4776 if (j == 0)
4777 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4778 else
4779 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4780 prev_stmt_info = vinfo_for_stmt (new_stmt);
4783 vec_oprnds0.release ();
4784 vec_oprnds1.release ();
4786 return true;
4790 /* Function vectorizable_operation.
4792 Check if STMT performs a binary, unary or ternary operation that can
4793 be vectorized.
4794 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4795 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4796 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4798 static bool
4799 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
4800 gimple **vec_stmt, slp_tree slp_node)
4802 tree vec_dest;
4803 tree scalar_dest;
4804 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4805 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4806 tree vectype;
4807 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4808 enum tree_code code;
4809 machine_mode vec_mode;
4810 tree new_temp;
4811 int op_type;
4812 optab optab;
4813 bool target_support_p;
4814 gimple *def_stmt;
4815 enum vect_def_type dt[3]
4816 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4817 gimple *new_stmt = NULL;
4818 stmt_vec_info prev_stmt_info;
4819 int nunits_in;
4820 int nunits_out;
4821 tree vectype_out;
4822 int ncopies;
4823 int j, i;
4824 vec<tree> vec_oprnds0 = vNULL;
4825 vec<tree> vec_oprnds1 = vNULL;
4826 vec<tree> vec_oprnds2 = vNULL;
4827 tree vop0, vop1, vop2;
4828 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4829 vec_info *vinfo = stmt_info->vinfo;
4830 int vf;
4832 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4833 return false;
4835 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4836 && ! vec_stmt)
4837 return false;
4839 /* Is STMT a vectorizable binary/unary operation? */
4840 if (!is_gimple_assign (stmt))
4841 return false;
4843 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4844 return false;
4846 code = gimple_assign_rhs_code (stmt);
4848 /* For pointer addition, we should use the normal plus for
4849 the vector addition. */
4850 if (code == POINTER_PLUS_EXPR)
4851 code = PLUS_EXPR;
4853 /* Support only unary or binary operations. */
4854 op_type = TREE_CODE_LENGTH (code);
4855 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4857 if (dump_enabled_p ())
4858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4859 "num. args = %d (not unary/binary/ternary op).\n",
4860 op_type);
4861 return false;
4864 scalar_dest = gimple_assign_lhs (stmt);
4865 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4867 /* Most operations cannot handle bit-precision types without extra
4868 truncations. */
4869 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4870 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4871 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4872 /* Exception are bitwise binary operations. */
4873 && code != BIT_IOR_EXPR
4874 && code != BIT_XOR_EXPR
4875 && code != BIT_AND_EXPR)
4877 if (dump_enabled_p ())
4878 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4879 "bit-precision arithmetic not supported.\n");
4880 return false;
4883 op0 = gimple_assign_rhs1 (stmt);
4884 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4886 if (dump_enabled_p ())
4887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4888 "use not simple.\n");
4889 return false;
4891 /* If op0 is an external or constant def use a vector type with
4892 the same size as the output vector type. */
4893 if (!vectype)
4895 /* For boolean type we cannot determine vectype by
4896 invariant value (don't know whether it is a vector
4897 of booleans or vector of integers). We use output
4898 vectype because operations on boolean don't change
4899 type. */
4900 if (TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE)
4902 if (TREE_CODE (TREE_TYPE (scalar_dest)) != BOOLEAN_TYPE)
4904 if (dump_enabled_p ())
4905 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4906 "not supported operation on bool value.\n");
4907 return false;
4909 vectype = vectype_out;
4911 else
4912 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4914 if (vec_stmt)
4915 gcc_assert (vectype);
4916 if (!vectype)
4918 if (dump_enabled_p ())
4920 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4921 "no vectype for scalar type ");
4922 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4923 TREE_TYPE (op0));
4924 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4927 return false;
4930 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4931 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4932 if (nunits_out != nunits_in)
4933 return false;
4935 if (op_type == binary_op || op_type == ternary_op)
4937 op1 = gimple_assign_rhs2 (stmt);
4938 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
4940 if (dump_enabled_p ())
4941 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4942 "use not simple.\n");
4943 return false;
4946 if (op_type == ternary_op)
4948 op2 = gimple_assign_rhs3 (stmt);
4949 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
4951 if (dump_enabled_p ())
4952 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4953 "use not simple.\n");
4954 return false;
4958 if (loop_vinfo)
4959 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4960 else
4961 vf = 1;
4963 /* Multiple types in SLP are handled by creating the appropriate number of
4964 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4965 case of SLP. */
4966 if (slp_node || PURE_SLP_STMT (stmt_info))
4967 ncopies = 1;
4968 else
4969 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4971 gcc_assert (ncopies >= 1);
4973 /* Shifts are handled in vectorizable_shift (). */
4974 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4975 || code == RROTATE_EXPR)
4976 return false;
4978 /* Supportable by target? */
4980 vec_mode = TYPE_MODE (vectype);
4981 if (code == MULT_HIGHPART_EXPR)
4982 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
4983 else
4985 optab = optab_for_tree_code (code, vectype, optab_default);
4986 if (!optab)
4988 if (dump_enabled_p ())
4989 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4990 "no optab.\n");
4991 return false;
4993 target_support_p = (optab_handler (optab, vec_mode)
4994 != CODE_FOR_nothing);
4997 if (!target_support_p)
4999 if (dump_enabled_p ())
5000 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5001 "op not supported by target.\n");
5002 /* Check only during analysis. */
5003 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5004 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
5005 return false;
5006 if (dump_enabled_p ())
5007 dump_printf_loc (MSG_NOTE, vect_location,
5008 "proceeding using word mode.\n");
5011 /* Worthwhile without SIMD support? Check only during analysis. */
5012 if (!VECTOR_MODE_P (vec_mode)
5013 && !vec_stmt
5014 && vf < vect_min_worthwhile_factor (code))
5016 if (dump_enabled_p ())
5017 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5018 "not worthwhile without SIMD support.\n");
5019 return false;
5022 if (!vec_stmt) /* transformation not required. */
5024 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5025 if (dump_enabled_p ())
5026 dump_printf_loc (MSG_NOTE, vect_location,
5027 "=== vectorizable_operation ===\n");
5028 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
5029 return true;
5032 /** Transform. **/
5034 if (dump_enabled_p ())
5035 dump_printf_loc (MSG_NOTE, vect_location,
5036 "transform binary/unary operation.\n");
5038 /* Handle def. */
5039 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5041 /* In case the vectorization factor (VF) is bigger than the number
5042 of elements that we can fit in a vectype (nunits), we have to generate
5043 more than one vector stmt - i.e - we need to "unroll" the
5044 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5045 from one copy of the vector stmt to the next, in the field
5046 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5047 stages to find the correct vector defs to be used when vectorizing
5048 stmts that use the defs of the current stmt. The example below
5049 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5050 we need to create 4 vectorized stmts):
5052 before vectorization:
5053 RELATED_STMT VEC_STMT
5054 S1: x = memref - -
5055 S2: z = x + 1 - -
5057 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5058 there):
5059 RELATED_STMT VEC_STMT
5060 VS1_0: vx0 = memref0 VS1_1 -
5061 VS1_1: vx1 = memref1 VS1_2 -
5062 VS1_2: vx2 = memref2 VS1_3 -
5063 VS1_3: vx3 = memref3 - -
5064 S1: x = load - VS1_0
5065 S2: z = x + 1 - -
5067 step2: vectorize stmt S2 (done here):
5068 To vectorize stmt S2 we first need to find the relevant vector
5069 def for the first operand 'x'. This is, as usual, obtained from
5070 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5071 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5072 relevant vector def 'vx0'. Having found 'vx0' we can generate
5073 the vector stmt VS2_0, and as usual, record it in the
5074 STMT_VINFO_VEC_STMT of stmt S2.
5075 When creating the second copy (VS2_1), we obtain the relevant vector
5076 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5077 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5078 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5079 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5080 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5081 chain of stmts and pointers:
5082 RELATED_STMT VEC_STMT
5083 VS1_0: vx0 = memref0 VS1_1 -
5084 VS1_1: vx1 = memref1 VS1_2 -
5085 VS1_2: vx2 = memref2 VS1_3 -
5086 VS1_3: vx3 = memref3 - -
5087 S1: x = load - VS1_0
5088 VS2_0: vz0 = vx0 + v1 VS2_1 -
5089 VS2_1: vz1 = vx1 + v1 VS2_2 -
5090 VS2_2: vz2 = vx2 + v1 VS2_3 -
5091 VS2_3: vz3 = vx3 + v1 - -
5092 S2: z = x + 1 - VS2_0 */
5094 prev_stmt_info = NULL;
5095 for (j = 0; j < ncopies; j++)
5097 /* Handle uses. */
5098 if (j == 0)
5100 if (op_type == binary_op || op_type == ternary_op)
5101 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5102 slp_node, -1);
5103 else
5104 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5105 slp_node, -1);
5106 if (op_type == ternary_op)
5108 vec_oprnds2.create (1);
5109 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
5110 stmt));
5113 else
5115 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5116 if (op_type == ternary_op)
5118 tree vec_oprnd = vec_oprnds2.pop ();
5119 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5120 vec_oprnd));
5124 /* Arguments are ready. Create the new vector stmt. */
5125 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5127 vop1 = ((op_type == binary_op || op_type == ternary_op)
5128 ? vec_oprnds1[i] : NULL_TREE);
5129 vop2 = ((op_type == ternary_op)
5130 ? vec_oprnds2[i] : NULL_TREE);
5131 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5132 new_temp = make_ssa_name (vec_dest, new_stmt);
5133 gimple_assign_set_lhs (new_stmt, new_temp);
5134 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5135 if (slp_node)
5136 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5139 if (slp_node)
5140 continue;
5142 if (j == 0)
5143 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5144 else
5145 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5146 prev_stmt_info = vinfo_for_stmt (new_stmt);
5149 vec_oprnds0.release ();
5150 vec_oprnds1.release ();
5151 vec_oprnds2.release ();
5153 return true;
5156 /* A helper function to ensure data reference DR's base alignment
5157 for STMT_INFO. */
5159 static void
5160 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
5162 if (!dr->aux)
5163 return;
5165 if (DR_VECT_AUX (dr)->base_misaligned)
5167 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5168 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5170 if (decl_in_symtab_p (base_decl))
5171 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
5172 else
5174 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
5175 DECL_USER_ALIGN (base_decl) = 1;
5177 DR_VECT_AUX (dr)->base_misaligned = false;
5182 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
5183 reversal of the vector elements. If that is impossible to do,
5184 returns NULL. */
5186 static tree
5187 perm_mask_for_reverse (tree vectype)
5189 int i, nunits;
5190 unsigned char *sel;
5192 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5193 sel = XALLOCAVEC (unsigned char, nunits);
5195 for (i = 0; i < nunits; ++i)
5196 sel[i] = nunits - 1 - i;
5198 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
5199 return NULL_TREE;
5200 return vect_gen_perm_mask_checked (vectype, sel);
5203 /* Function vectorizable_store.
5205 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5206 can be vectorized.
5207 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5208 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5209 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5211 static bool
5212 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5213 slp_tree slp_node)
5215 tree scalar_dest;
5216 tree data_ref;
5217 tree op;
5218 tree vec_oprnd = NULL_TREE;
5219 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5220 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5221 tree elem_type;
5222 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5223 struct loop *loop = NULL;
5224 machine_mode vec_mode;
5225 tree dummy;
5226 enum dr_alignment_support alignment_support_scheme;
5227 gimple *def_stmt;
5228 enum vect_def_type dt;
5229 stmt_vec_info prev_stmt_info = NULL;
5230 tree dataref_ptr = NULL_TREE;
5231 tree dataref_offset = NULL_TREE;
5232 gimple *ptr_incr = NULL;
5233 int ncopies;
5234 int j;
5235 gimple *next_stmt, *first_stmt = NULL;
5236 bool grouped_store = false;
5237 bool store_lanes_p = false;
5238 unsigned int group_size, i;
5239 vec<tree> dr_chain = vNULL;
5240 vec<tree> oprnds = vNULL;
5241 vec<tree> result_chain = vNULL;
5242 bool inv_p;
5243 bool negative = false;
5244 tree offset = NULL_TREE;
5245 vec<tree> vec_oprnds = vNULL;
5246 bool slp = (slp_node != NULL);
5247 unsigned int vec_num;
5248 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5249 vec_info *vinfo = stmt_info->vinfo;
5250 tree aggr_type;
5251 tree scatter_base = NULL_TREE, scatter_off = NULL_TREE;
5252 tree scatter_off_vectype = NULL_TREE, scatter_decl = NULL_TREE;
5253 int scatter_scale = 1;
5254 enum vect_def_type scatter_idx_dt = vect_unknown_def_type;
5255 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5256 gimple *new_stmt;
5258 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5259 return false;
5261 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5262 && ! vec_stmt)
5263 return false;
5265 /* Is vectorizable store? */
5267 if (!is_gimple_assign (stmt))
5268 return false;
5270 scalar_dest = gimple_assign_lhs (stmt);
5271 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5272 && is_pattern_stmt_p (stmt_info))
5273 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5274 if (TREE_CODE (scalar_dest) != ARRAY_REF
5275 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5276 && TREE_CODE (scalar_dest) != INDIRECT_REF
5277 && TREE_CODE (scalar_dest) != COMPONENT_REF
5278 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5279 && TREE_CODE (scalar_dest) != REALPART_EXPR
5280 && TREE_CODE (scalar_dest) != MEM_REF)
5281 return false;
5283 gcc_assert (gimple_assign_single_p (stmt));
5285 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5286 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5288 if (loop_vinfo)
5289 loop = LOOP_VINFO_LOOP (loop_vinfo);
5291 /* Multiple types in SLP are handled by creating the appropriate number of
5292 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5293 case of SLP. */
5294 if (slp || PURE_SLP_STMT (stmt_info))
5295 ncopies = 1;
5296 else
5297 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5299 gcc_assert (ncopies >= 1);
5301 /* FORNOW. This restriction should be relaxed. */
5302 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5304 if (dump_enabled_p ())
5305 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5306 "multiple types in nested loop.\n");
5307 return false;
5310 op = gimple_assign_rhs1 (stmt);
5311 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
5313 if (dump_enabled_p ())
5314 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5315 "use not simple.\n");
5316 return false;
5319 elem_type = TREE_TYPE (vectype);
5320 vec_mode = TYPE_MODE (vectype);
5322 /* FORNOW. In some cases can vectorize even if data-type not supported
5323 (e.g. - array initialization with 0). */
5324 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5325 return false;
5327 if (!STMT_VINFO_DATA_REF (stmt_info))
5328 return false;
5330 if (!STMT_VINFO_STRIDED_P (stmt_info))
5332 negative =
5333 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5334 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5335 size_zero_node) < 0;
5336 if (negative && ncopies > 1)
5338 if (dump_enabled_p ())
5339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5340 "multiple types with negative step.\n");
5341 return false;
5343 if (negative)
5345 gcc_assert (!grouped_store);
5346 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5347 if (alignment_support_scheme != dr_aligned
5348 && alignment_support_scheme != dr_unaligned_supported)
5350 if (dump_enabled_p ())
5351 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5352 "negative step but alignment required.\n");
5353 return false;
5355 if (dt != vect_constant_def
5356 && dt != vect_external_def
5357 && !perm_mask_for_reverse (vectype))
5359 if (dump_enabled_p ())
5360 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5361 "negative step and reversing not supported.\n");
5362 return false;
5367 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5369 grouped_store = true;
5370 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5371 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5372 if (!slp
5373 && !PURE_SLP_STMT (stmt_info)
5374 && !STMT_VINFO_STRIDED_P (stmt_info))
5376 if (vect_store_lanes_supported (vectype, group_size))
5377 store_lanes_p = true;
5378 else if (!vect_grouped_store_supported (vectype, group_size))
5379 return false;
5382 if (STMT_VINFO_STRIDED_P (stmt_info)
5383 && (slp || PURE_SLP_STMT (stmt_info))
5384 && (group_size > nunits
5385 || nunits % group_size != 0))
5387 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5388 "unhandled strided group store\n");
5389 return false;
5392 if (first_stmt == stmt)
5394 /* STMT is the leader of the group. Check the operands of all the
5395 stmts of the group. */
5396 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5397 while (next_stmt)
5399 gcc_assert (gimple_assign_single_p (next_stmt));
5400 op = gimple_assign_rhs1 (next_stmt);
5401 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
5403 if (dump_enabled_p ())
5404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5405 "use not simple.\n");
5406 return false;
5408 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5413 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5415 gimple *def_stmt;
5416 scatter_decl = vect_check_gather_scatter (stmt, loop_vinfo, &scatter_base,
5417 &scatter_off, &scatter_scale);
5418 gcc_assert (scatter_decl);
5419 if (!vect_is_simple_use (scatter_off, vinfo, &def_stmt, &scatter_idx_dt,
5420 &scatter_off_vectype))
5422 if (dump_enabled_p ())
5423 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5424 "scatter index use not simple.");
5425 return false;
5429 if (!vec_stmt) /* transformation not required. */
5431 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5432 /* The SLP costs are calculated during SLP analysis. */
5433 if (!PURE_SLP_STMT (stmt_info))
5434 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5435 NULL, NULL, NULL);
5436 return true;
5439 /** Transform. **/
5441 ensure_base_align (stmt_info, dr);
5443 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5445 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5446 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (scatter_decl));
5447 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5448 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5449 edge pe = loop_preheader_edge (loop);
5450 gimple_seq seq;
5451 basic_block new_bb;
5452 enum { NARROW, NONE, WIDEN } modifier;
5453 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (scatter_off_vectype);
5455 if (nunits == (unsigned int) scatter_off_nunits)
5456 modifier = NONE;
5457 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5459 unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
5460 modifier = WIDEN;
5462 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5463 sel[i] = i | nunits;
5465 perm_mask = vect_gen_perm_mask_checked (scatter_off_vectype, sel);
5466 gcc_assert (perm_mask != NULL_TREE);
5468 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5470 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5471 modifier = NARROW;
5473 for (i = 0; i < (unsigned int) nunits; ++i)
5474 sel[i] = i | scatter_off_nunits;
5476 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5477 gcc_assert (perm_mask != NULL_TREE);
5478 ncopies *= 2;
5480 else
5481 gcc_unreachable ();
5483 rettype = TREE_TYPE (TREE_TYPE (scatter_decl));
5484 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5485 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5486 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5487 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5488 scaletype = TREE_VALUE (arglist);
5490 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5491 && TREE_CODE (rettype) == VOID_TYPE);
5493 ptr = fold_convert (ptrtype, scatter_base);
5494 if (!is_gimple_min_invariant (ptr))
5496 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5497 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5498 gcc_assert (!new_bb);
5501 /* Currently we support only unconditional scatter stores,
5502 so mask should be all ones. */
5503 mask = build_int_cst (masktype, -1);
5504 mask = vect_init_vector (stmt, mask, masktype, NULL);
5506 scale = build_int_cst (scaletype, scatter_scale);
5508 prev_stmt_info = NULL;
5509 for (j = 0; j < ncopies; ++j)
5511 if (j == 0)
5513 src = vec_oprnd1
5514 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
5515 op = vec_oprnd0
5516 = vect_get_vec_def_for_operand (scatter_off, stmt);
5518 else if (modifier != NONE && (j & 1))
5520 if (modifier == WIDEN)
5522 src = vec_oprnd1
5523 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5524 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5525 stmt, gsi);
5527 else if (modifier == NARROW)
5529 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5530 stmt, gsi);
5531 op = vec_oprnd0
5532 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
5534 else
5535 gcc_unreachable ();
5537 else
5539 src = vec_oprnd1
5540 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5541 op = vec_oprnd0
5542 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
5545 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5547 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5548 == TYPE_VECTOR_SUBPARTS (srctype));
5549 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5550 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5551 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5552 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5553 src = var;
5556 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5558 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5559 == TYPE_VECTOR_SUBPARTS (idxtype));
5560 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5561 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5562 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5563 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5564 op = var;
5567 new_stmt
5568 = gimple_build_call (scatter_decl, 5, ptr, mask, op, src, scale);
5570 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5572 if (prev_stmt_info == NULL)
5573 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5574 else
5575 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5576 prev_stmt_info = vinfo_for_stmt (new_stmt);
5578 return true;
5581 if (grouped_store)
5583 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5584 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5586 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5588 /* FORNOW */
5589 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5591 /* We vectorize all the stmts of the interleaving group when we
5592 reach the last stmt in the group. */
5593 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5594 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5595 && !slp)
5597 *vec_stmt = NULL;
5598 return true;
5601 if (slp)
5603 grouped_store = false;
5604 /* VEC_NUM is the number of vect stmts to be created for this
5605 group. */
5606 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5607 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5608 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
5609 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5610 op = gimple_assign_rhs1 (first_stmt);
5612 else
5613 /* VEC_NUM is the number of vect stmts to be created for this
5614 group. */
5615 vec_num = group_size;
5617 else
5619 first_stmt = stmt;
5620 first_dr = dr;
5621 group_size = vec_num = 1;
5624 if (dump_enabled_p ())
5625 dump_printf_loc (MSG_NOTE, vect_location,
5626 "transform store. ncopies = %d\n", ncopies);
5628 if (STMT_VINFO_STRIDED_P (stmt_info))
5630 gimple_stmt_iterator incr_gsi;
5631 bool insert_after;
5632 gimple *incr;
5633 tree offvar;
5634 tree ivstep;
5635 tree running_off;
5636 gimple_seq stmts = NULL;
5637 tree stride_base, stride_step, alias_off;
5638 tree vec_oprnd;
5639 unsigned int g;
5641 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5643 stride_base
5644 = fold_build_pointer_plus
5645 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
5646 size_binop (PLUS_EXPR,
5647 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5648 convert_to_ptrofftype (DR_INIT(first_dr))));
5649 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
5651 /* For a store with loop-invariant (but other than power-of-2)
5652 stride (i.e. not a grouped access) like so:
5654 for (i = 0; i < n; i += stride)
5655 array[i] = ...;
5657 we generate a new induction variable and new stores from
5658 the components of the (vectorized) rhs:
5660 for (j = 0; ; j += VF*stride)
5661 vectemp = ...;
5662 tmp1 = vectemp[0];
5663 array[j] = tmp1;
5664 tmp2 = vectemp[1];
5665 array[j + stride] = tmp2;
5669 unsigned nstores = nunits;
5670 tree ltype = elem_type;
5671 if (slp)
5673 nstores = nunits / group_size;
5674 if (group_size < nunits)
5675 ltype = build_vector_type (elem_type, group_size);
5676 else
5677 ltype = vectype;
5678 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
5679 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5680 group_size = 1;
5683 ivstep = stride_step;
5684 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5685 build_int_cst (TREE_TYPE (ivstep),
5686 ncopies * nstores));
5688 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5690 create_iv (stride_base, ivstep, NULL,
5691 loop, &incr_gsi, insert_after,
5692 &offvar, NULL);
5693 incr = gsi_stmt (incr_gsi);
5694 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
5696 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5697 if (stmts)
5698 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5700 prev_stmt_info = NULL;
5701 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
5702 next_stmt = first_stmt;
5703 for (g = 0; g < group_size; g++)
5705 running_off = offvar;
5706 if (g)
5708 tree size = TYPE_SIZE_UNIT (ltype);
5709 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
5710 size);
5711 tree newoff = copy_ssa_name (running_off, NULL);
5712 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5713 running_off, pos);
5714 vect_finish_stmt_generation (stmt, incr, gsi);
5715 running_off = newoff;
5717 for (j = 0; j < ncopies; j++)
5719 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5720 and first_stmt == stmt. */
5721 if (j == 0)
5723 if (slp)
5725 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
5726 slp_node, -1);
5727 vec_oprnd = vec_oprnds[0];
5729 else
5731 gcc_assert (gimple_assign_single_p (next_stmt));
5732 op = gimple_assign_rhs1 (next_stmt);
5733 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
5736 else
5738 if (slp)
5739 vec_oprnd = vec_oprnds[j];
5740 else
5742 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
5743 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
5747 for (i = 0; i < nstores; i++)
5749 tree newref, newoff;
5750 gimple *incr, *assign;
5751 tree size = TYPE_SIZE (ltype);
5752 /* Extract the i'th component. */
5753 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
5754 bitsize_int (i), size);
5755 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
5756 size, pos);
5758 elem = force_gimple_operand_gsi (gsi, elem, true,
5759 NULL_TREE, true,
5760 GSI_SAME_STMT);
5762 newref = build2 (MEM_REF, ltype,
5763 running_off, alias_off);
5765 /* And store it to *running_off. */
5766 assign = gimple_build_assign (newref, elem);
5767 vect_finish_stmt_generation (stmt, assign, gsi);
5769 newoff = copy_ssa_name (running_off, NULL);
5770 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5771 running_off, stride_step);
5772 vect_finish_stmt_generation (stmt, incr, gsi);
5774 running_off = newoff;
5775 if (g == group_size - 1
5776 && !slp)
5778 if (j == 0 && i == 0)
5779 STMT_VINFO_VEC_STMT (stmt_info)
5780 = *vec_stmt = assign;
5781 else
5782 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
5783 prev_stmt_info = vinfo_for_stmt (assign);
5787 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5789 return true;
5792 dr_chain.create (group_size);
5793 oprnds.create (group_size);
5795 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5796 gcc_assert (alignment_support_scheme);
5797 /* Targets with store-lane instructions must not require explicit
5798 realignment. */
5799 gcc_assert (!store_lanes_p
5800 || alignment_support_scheme == dr_aligned
5801 || alignment_support_scheme == dr_unaligned_supported);
5803 if (negative)
5804 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5806 if (store_lanes_p)
5807 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5808 else
5809 aggr_type = vectype;
5811 /* In case the vectorization factor (VF) is bigger than the number
5812 of elements that we can fit in a vectype (nunits), we have to generate
5813 more than one vector stmt - i.e - we need to "unroll" the
5814 vector stmt by a factor VF/nunits. For more details see documentation in
5815 vect_get_vec_def_for_copy_stmt. */
5817 /* In case of interleaving (non-unit grouped access):
5819 S1: &base + 2 = x2
5820 S2: &base = x0
5821 S3: &base + 1 = x1
5822 S4: &base + 3 = x3
5824 We create vectorized stores starting from base address (the access of the
5825 first stmt in the chain (S2 in the above example), when the last store stmt
5826 of the chain (S4) is reached:
5828 VS1: &base = vx2
5829 VS2: &base + vec_size*1 = vx0
5830 VS3: &base + vec_size*2 = vx1
5831 VS4: &base + vec_size*3 = vx3
5833 Then permutation statements are generated:
5835 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5836 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5839 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5840 (the order of the data-refs in the output of vect_permute_store_chain
5841 corresponds to the order of scalar stmts in the interleaving chain - see
5842 the documentation of vect_permute_store_chain()).
5844 In case of both multiple types and interleaving, above vector stores and
5845 permutation stmts are created for every copy. The result vector stmts are
5846 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5847 STMT_VINFO_RELATED_STMT for the next copies.
5850 prev_stmt_info = NULL;
5851 for (j = 0; j < ncopies; j++)
5854 if (j == 0)
5856 if (slp)
5858 /* Get vectorized arguments for SLP_NODE. */
5859 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5860 NULL, slp_node, -1);
5862 vec_oprnd = vec_oprnds[0];
5864 else
5866 /* For interleaved stores we collect vectorized defs for all the
5867 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5868 used as an input to vect_permute_store_chain(), and OPRNDS as
5869 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5871 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5872 OPRNDS are of size 1. */
5873 next_stmt = first_stmt;
5874 for (i = 0; i < group_size; i++)
5876 /* Since gaps are not supported for interleaved stores,
5877 GROUP_SIZE is the exact number of stmts in the chain.
5878 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5879 there is no interleaving, GROUP_SIZE is 1, and only one
5880 iteration of the loop will be executed. */
5881 gcc_assert (next_stmt
5882 && gimple_assign_single_p (next_stmt));
5883 op = gimple_assign_rhs1 (next_stmt);
5885 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
5886 dr_chain.quick_push (vec_oprnd);
5887 oprnds.quick_push (vec_oprnd);
5888 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5892 /* We should have catched mismatched types earlier. */
5893 gcc_assert (useless_type_conversion_p (vectype,
5894 TREE_TYPE (vec_oprnd)));
5895 bool simd_lane_access_p
5896 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5897 if (simd_lane_access_p
5898 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5899 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5900 && integer_zerop (DR_OFFSET (first_dr))
5901 && integer_zerop (DR_INIT (first_dr))
5902 && alias_sets_conflict_p (get_alias_set (aggr_type),
5903 get_alias_set (DR_REF (first_dr))))
5905 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5906 dataref_offset = build_int_cst (reference_alias_ptr_type
5907 (DR_REF (first_dr)), 0);
5908 inv_p = false;
5910 else
5911 dataref_ptr
5912 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5913 simd_lane_access_p ? loop : NULL,
5914 offset, &dummy, gsi, &ptr_incr,
5915 simd_lane_access_p, &inv_p);
5916 gcc_assert (bb_vinfo || !inv_p);
5918 else
5920 /* For interleaved stores we created vectorized defs for all the
5921 defs stored in OPRNDS in the previous iteration (previous copy).
5922 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5923 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5924 next copy.
5925 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5926 OPRNDS are of size 1. */
5927 for (i = 0; i < group_size; i++)
5929 op = oprnds[i];
5930 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
5931 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5932 dr_chain[i] = vec_oprnd;
5933 oprnds[i] = vec_oprnd;
5935 if (dataref_offset)
5936 dataref_offset
5937 = int_const_binop (PLUS_EXPR, dataref_offset,
5938 TYPE_SIZE_UNIT (aggr_type));
5939 else
5940 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5941 TYPE_SIZE_UNIT (aggr_type));
5944 if (store_lanes_p)
5946 tree vec_array;
5948 /* Combine all the vectors into an array. */
5949 vec_array = create_vector_array (vectype, vec_num);
5950 for (i = 0; i < vec_num; i++)
5952 vec_oprnd = dr_chain[i];
5953 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5956 /* Emit:
5957 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5958 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5959 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5960 gimple_call_set_lhs (new_stmt, data_ref);
5961 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5963 else
5965 new_stmt = NULL;
5966 if (grouped_store)
5968 if (j == 0)
5969 result_chain.create (group_size);
5970 /* Permute. */
5971 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5972 &result_chain);
5975 next_stmt = first_stmt;
5976 for (i = 0; i < vec_num; i++)
5978 unsigned align, misalign;
5980 if (i > 0)
5981 /* Bump the vector pointer. */
5982 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5983 stmt, NULL_TREE);
5985 if (slp)
5986 vec_oprnd = vec_oprnds[i];
5987 else if (grouped_store)
5988 /* For grouped stores vectorized defs are interleaved in
5989 vect_permute_store_chain(). */
5990 vec_oprnd = result_chain[i];
5992 data_ref = fold_build2 (MEM_REF, TREE_TYPE (vec_oprnd),
5993 dataref_ptr,
5994 dataref_offset
5995 ? dataref_offset
5996 : build_int_cst (reference_alias_ptr_type
5997 (DR_REF (first_dr)), 0));
5998 align = TYPE_ALIGN_UNIT (vectype);
5999 if (aligned_access_p (first_dr))
6000 misalign = 0;
6001 else if (DR_MISALIGNMENT (first_dr) == -1)
6003 if (DR_VECT_AUX (first_dr)->base_element_aligned)
6004 align = TYPE_ALIGN_UNIT (elem_type);
6005 else
6006 align = get_object_alignment (DR_REF (first_dr))
6007 / BITS_PER_UNIT;
6008 misalign = 0;
6009 TREE_TYPE (data_ref)
6010 = build_aligned_type (TREE_TYPE (data_ref),
6011 align * BITS_PER_UNIT);
6013 else
6015 TREE_TYPE (data_ref)
6016 = build_aligned_type (TREE_TYPE (data_ref),
6017 TYPE_ALIGN (elem_type));
6018 misalign = DR_MISALIGNMENT (first_dr);
6020 if (dataref_offset == NULL_TREE
6021 && TREE_CODE (dataref_ptr) == SSA_NAME)
6022 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6023 misalign);
6025 if (negative
6026 && dt != vect_constant_def
6027 && dt != vect_external_def)
6029 tree perm_mask = perm_mask_for_reverse (vectype);
6030 tree perm_dest
6031 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6032 vectype);
6033 tree new_temp = make_ssa_name (perm_dest);
6035 /* Generate the permute statement. */
6036 gimple *perm_stmt
6037 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6038 vec_oprnd, perm_mask);
6039 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6041 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6042 vec_oprnd = new_temp;
6045 /* Arguments are ready. Create the new vector stmt. */
6046 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6047 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6049 if (slp)
6050 continue;
6052 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6053 if (!next_stmt)
6054 break;
6057 if (!slp)
6059 if (j == 0)
6060 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6061 else
6062 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6063 prev_stmt_info = vinfo_for_stmt (new_stmt);
6067 dr_chain.release ();
6068 oprnds.release ();
6069 result_chain.release ();
6070 vec_oprnds.release ();
6072 return true;
6075 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6076 VECTOR_CST mask. No checks are made that the target platform supports the
6077 mask, so callers may wish to test can_vec_perm_p separately, or use
6078 vect_gen_perm_mask_checked. */
6080 tree
6081 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
6083 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
6084 int i, nunits;
6086 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6088 mask_elt_type = lang_hooks.types.type_for_mode
6089 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
6090 mask_type = get_vectype_for_scalar_type (mask_elt_type);
6092 mask_elts = XALLOCAVEC (tree, nunits);
6093 for (i = nunits - 1; i >= 0; i--)
6094 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
6095 mask_vec = build_vector (mask_type, mask_elts);
6097 return mask_vec;
6100 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6101 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6103 tree
6104 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
6106 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
6107 return vect_gen_perm_mask_any (vectype, sel);
6110 /* Given a vector variable X and Y, that was generated for the scalar
6111 STMT, generate instructions to permute the vector elements of X and Y
6112 using permutation mask MASK_VEC, insert them at *GSI and return the
6113 permuted vector variable. */
6115 static tree
6116 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6117 gimple_stmt_iterator *gsi)
6119 tree vectype = TREE_TYPE (x);
6120 tree perm_dest, data_ref;
6121 gimple *perm_stmt;
6123 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6124 data_ref = make_ssa_name (perm_dest);
6126 /* Generate the permute statement. */
6127 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6128 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6130 return data_ref;
6133 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6134 inserting them on the loops preheader edge. Returns true if we
6135 were successful in doing so (and thus STMT can be moved then),
6136 otherwise returns false. */
6138 static bool
6139 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6141 ssa_op_iter i;
6142 tree op;
6143 bool any = false;
6145 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6147 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6148 if (!gimple_nop_p (def_stmt)
6149 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6151 /* Make sure we don't need to recurse. While we could do
6152 so in simple cases when there are more complex use webs
6153 we don't have an easy way to preserve stmt order to fulfil
6154 dependencies within them. */
6155 tree op2;
6156 ssa_op_iter i2;
6157 if (gimple_code (def_stmt) == GIMPLE_PHI)
6158 return false;
6159 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6161 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6162 if (!gimple_nop_p (def_stmt2)
6163 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6164 return false;
6166 any = true;
6170 if (!any)
6171 return true;
6173 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6175 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6176 if (!gimple_nop_p (def_stmt)
6177 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6179 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6180 gsi_remove (&gsi, false);
6181 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6185 return true;
6188 /* vectorizable_load.
6190 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6191 can be vectorized.
6192 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6193 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6194 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6196 static bool
6197 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6198 slp_tree slp_node, slp_instance slp_node_instance)
6200 tree scalar_dest;
6201 tree vec_dest = NULL;
6202 tree data_ref = NULL;
6203 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6204 stmt_vec_info prev_stmt_info;
6205 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6206 struct loop *loop = NULL;
6207 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6208 bool nested_in_vect_loop = false;
6209 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6210 tree elem_type;
6211 tree new_temp;
6212 machine_mode mode;
6213 gimple *new_stmt = NULL;
6214 tree dummy;
6215 enum dr_alignment_support alignment_support_scheme;
6216 tree dataref_ptr = NULL_TREE;
6217 tree dataref_offset = NULL_TREE;
6218 gimple *ptr_incr = NULL;
6219 int ncopies;
6220 int i, j, group_size = -1, group_gap_adj;
6221 tree msq = NULL_TREE, lsq;
6222 tree offset = NULL_TREE;
6223 tree byte_offset = NULL_TREE;
6224 tree realignment_token = NULL_TREE;
6225 gphi *phi = NULL;
6226 vec<tree> dr_chain = vNULL;
6227 bool grouped_load = false;
6228 bool load_lanes_p = false;
6229 gimple *first_stmt;
6230 gimple *first_stmt_for_drptr = NULL;
6231 bool inv_p;
6232 bool negative = false;
6233 bool compute_in_loop = false;
6234 struct loop *at_loop;
6235 int vec_num;
6236 bool slp = (slp_node != NULL);
6237 bool slp_perm = false;
6238 enum tree_code code;
6239 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6240 int vf;
6241 tree aggr_type;
6242 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
6243 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
6244 int gather_scale = 1;
6245 enum vect_def_type gather_dt = vect_unknown_def_type;
6246 vec_info *vinfo = stmt_info->vinfo;
6248 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6249 return false;
6251 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6252 && ! vec_stmt)
6253 return false;
6255 /* Is vectorizable load? */
6256 if (!is_gimple_assign (stmt))
6257 return false;
6259 scalar_dest = gimple_assign_lhs (stmt);
6260 if (TREE_CODE (scalar_dest) != SSA_NAME)
6261 return false;
6263 code = gimple_assign_rhs_code (stmt);
6264 if (code != ARRAY_REF
6265 && code != BIT_FIELD_REF
6266 && code != INDIRECT_REF
6267 && code != COMPONENT_REF
6268 && code != IMAGPART_EXPR
6269 && code != REALPART_EXPR
6270 && code != MEM_REF
6271 && TREE_CODE_CLASS (code) != tcc_declaration)
6272 return false;
6274 if (!STMT_VINFO_DATA_REF (stmt_info))
6275 return false;
6277 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6278 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6280 if (loop_vinfo)
6282 loop = LOOP_VINFO_LOOP (loop_vinfo);
6283 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6284 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6286 else
6287 vf = 1;
6289 /* Multiple types in SLP are handled by creating the appropriate number of
6290 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6291 case of SLP. */
6292 if (slp || PURE_SLP_STMT (stmt_info))
6293 ncopies = 1;
6294 else
6295 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6297 gcc_assert (ncopies >= 1);
6299 /* FORNOW. This restriction should be relaxed. */
6300 if (nested_in_vect_loop && ncopies > 1)
6302 if (dump_enabled_p ())
6303 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6304 "multiple types in nested loop.\n");
6305 return false;
6308 /* Invalidate assumptions made by dependence analysis when vectorization
6309 on the unrolled body effectively re-orders stmts. */
6310 if (ncopies > 1
6311 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6312 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6313 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6315 if (dump_enabled_p ())
6316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6317 "cannot perform implicit CSE when unrolling "
6318 "with negative dependence distance\n");
6319 return false;
6322 elem_type = TREE_TYPE (vectype);
6323 mode = TYPE_MODE (vectype);
6325 /* FORNOW. In some cases can vectorize even if data-type not supported
6326 (e.g. - data copies). */
6327 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6329 if (dump_enabled_p ())
6330 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6331 "Aligned load, but unsupported type.\n");
6332 return false;
6335 /* Check if the load is a part of an interleaving chain. */
6336 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6338 grouped_load = true;
6339 /* FORNOW */
6340 gcc_assert (!nested_in_vect_loop && !STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6342 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6344 /* If this is single-element interleaving with an element distance
6345 that leaves unused vector loads around punt - we at least create
6346 very sub-optimal code in that case (and blow up memory,
6347 see PR65518). */
6348 bool force_peeling = false;
6349 if (first_stmt == stmt
6350 && !GROUP_NEXT_ELEMENT (stmt_info))
6352 if (GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
6354 if (dump_enabled_p ())
6355 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6356 "single-element interleaving not supported "
6357 "for not adjacent vector loads\n");
6358 return false;
6361 /* Single-element interleaving requires peeling for gaps. */
6362 force_peeling = true;
6365 /* If there is a gap in the end of the group or the group size cannot
6366 be made a multiple of the vector element count then we access excess
6367 elements in the last iteration and thus need to peel that off. */
6368 if (loop_vinfo
6369 && ! STMT_VINFO_STRIDED_P (stmt_info)
6370 && (force_peeling
6371 || GROUP_GAP (vinfo_for_stmt (first_stmt)) != 0
6372 || (!slp && vf % GROUP_SIZE (vinfo_for_stmt (first_stmt)) != 0)))
6374 if (dump_enabled_p ())
6375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6376 "Data access with gaps requires scalar "
6377 "epilogue loop\n");
6378 if (loop->inner)
6380 if (dump_enabled_p ())
6381 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6382 "Peeling for outer loop is not supported\n");
6383 return false;
6386 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
6389 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6390 slp_perm = true;
6392 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6393 if (!slp
6394 && !PURE_SLP_STMT (stmt_info)
6395 && !STMT_VINFO_STRIDED_P (stmt_info))
6397 if (vect_load_lanes_supported (vectype, group_size))
6398 load_lanes_p = true;
6399 else if (!vect_grouped_load_supported (vectype, group_size))
6400 return false;
6403 /* Invalidate assumptions made by dependence analysis when vectorization
6404 on the unrolled body effectively re-orders stmts. */
6405 if (!PURE_SLP_STMT (stmt_info)
6406 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6407 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6408 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6410 if (dump_enabled_p ())
6411 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6412 "cannot perform implicit CSE when performing "
6413 "group loads with negative dependence distance\n");
6414 return false;
6417 /* Similarly when the stmt is a load that is both part of a SLP
6418 instance and a loop vectorized stmt via the same-dr mechanism
6419 we have to give up. */
6420 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6421 && (STMT_SLP_TYPE (stmt_info)
6422 != STMT_SLP_TYPE (vinfo_for_stmt
6423 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6425 if (dump_enabled_p ())
6426 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6427 "conflicting SLP types for CSEd load\n");
6428 return false;
6433 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6435 gimple *def_stmt;
6436 gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
6437 &gather_off, &gather_scale);
6438 gcc_assert (gather_decl);
6439 if (!vect_is_simple_use (gather_off, vinfo, &def_stmt, &gather_dt,
6440 &gather_off_vectype))
6442 if (dump_enabled_p ())
6443 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6444 "gather index use not simple.\n");
6445 return false;
6448 else if (STMT_VINFO_STRIDED_P (stmt_info))
6450 if ((grouped_load
6451 && (slp || PURE_SLP_STMT (stmt_info)))
6452 && (group_size > nunits
6453 || nunits % group_size != 0))
6455 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6456 "unhandled strided group load\n");
6457 return false;
6460 else
6462 negative = tree_int_cst_compare (nested_in_vect_loop
6463 ? STMT_VINFO_DR_STEP (stmt_info)
6464 : DR_STEP (dr),
6465 size_zero_node) < 0;
6466 if (negative && ncopies > 1)
6468 if (dump_enabled_p ())
6469 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6470 "multiple types with negative step.\n");
6471 return false;
6474 if (negative)
6476 if (grouped_load)
6478 if (dump_enabled_p ())
6479 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6480 "negative step for group load not supported"
6481 "\n");
6482 return false;
6484 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
6485 if (alignment_support_scheme != dr_aligned
6486 && alignment_support_scheme != dr_unaligned_supported)
6488 if (dump_enabled_p ())
6489 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6490 "negative step but alignment required.\n");
6491 return false;
6493 if (!perm_mask_for_reverse (vectype))
6495 if (dump_enabled_p ())
6496 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6497 "negative step and reversing not supported."
6498 "\n");
6499 return false;
6504 if (!vec_stmt) /* transformation not required. */
6506 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6507 /* The SLP costs are calculated during SLP analysis. */
6508 if (!PURE_SLP_STMT (stmt_info))
6509 vect_model_load_cost (stmt_info, ncopies, load_lanes_p,
6510 NULL, NULL, NULL);
6511 return true;
6514 if (dump_enabled_p ())
6515 dump_printf_loc (MSG_NOTE, vect_location,
6516 "transform load. ncopies = %d\n", ncopies);
6518 /** Transform. **/
6520 ensure_base_align (stmt_info, dr);
6522 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6524 tree vec_oprnd0 = NULL_TREE, op;
6525 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
6526 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6527 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6528 edge pe = loop_preheader_edge (loop);
6529 gimple_seq seq;
6530 basic_block new_bb;
6531 enum { NARROW, NONE, WIDEN } modifier;
6532 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
6534 if (nunits == gather_off_nunits)
6535 modifier = NONE;
6536 else if (nunits == gather_off_nunits / 2)
6538 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6539 modifier = WIDEN;
6541 for (i = 0; i < gather_off_nunits; ++i)
6542 sel[i] = i | nunits;
6544 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
6546 else if (nunits == gather_off_nunits * 2)
6548 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6549 modifier = NARROW;
6551 for (i = 0; i < nunits; ++i)
6552 sel[i] = i < gather_off_nunits
6553 ? i : i + nunits - gather_off_nunits;
6555 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6556 ncopies *= 2;
6558 else
6559 gcc_unreachable ();
6561 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
6562 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6563 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6564 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6565 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6566 scaletype = TREE_VALUE (arglist);
6567 gcc_checking_assert (types_compatible_p (srctype, rettype));
6569 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6571 ptr = fold_convert (ptrtype, gather_base);
6572 if (!is_gimple_min_invariant (ptr))
6574 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6575 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6576 gcc_assert (!new_bb);
6579 /* Currently we support only unconditional gather loads,
6580 so mask should be all ones. */
6581 if (TREE_CODE (masktype) == INTEGER_TYPE)
6582 mask = build_int_cst (masktype, -1);
6583 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6585 mask = build_int_cst (TREE_TYPE (masktype), -1);
6586 mask = build_vector_from_val (masktype, mask);
6587 mask = vect_init_vector (stmt, mask, masktype, NULL);
6589 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6591 REAL_VALUE_TYPE r;
6592 long tmp[6];
6593 for (j = 0; j < 6; ++j)
6594 tmp[j] = -1;
6595 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6596 mask = build_real (TREE_TYPE (masktype), r);
6597 mask = build_vector_from_val (masktype, mask);
6598 mask = vect_init_vector (stmt, mask, masktype, NULL);
6600 else
6601 gcc_unreachable ();
6603 scale = build_int_cst (scaletype, gather_scale);
6605 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6606 merge = build_int_cst (TREE_TYPE (rettype), 0);
6607 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6609 REAL_VALUE_TYPE r;
6610 long tmp[6];
6611 for (j = 0; j < 6; ++j)
6612 tmp[j] = 0;
6613 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6614 merge = build_real (TREE_TYPE (rettype), r);
6616 else
6617 gcc_unreachable ();
6618 merge = build_vector_from_val (rettype, merge);
6619 merge = vect_init_vector (stmt, merge, rettype, NULL);
6621 prev_stmt_info = NULL;
6622 for (j = 0; j < ncopies; ++j)
6624 if (modifier == WIDEN && (j & 1))
6625 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6626 perm_mask, stmt, gsi);
6627 else if (j == 0)
6628 op = vec_oprnd0
6629 = vect_get_vec_def_for_operand (gather_off, stmt);
6630 else
6631 op = vec_oprnd0
6632 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6634 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6636 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6637 == TYPE_VECTOR_SUBPARTS (idxtype));
6638 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6639 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6640 new_stmt
6641 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6642 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6643 op = var;
6646 new_stmt
6647 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
6649 if (!useless_type_conversion_p (vectype, rettype))
6651 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6652 == TYPE_VECTOR_SUBPARTS (rettype));
6653 op = vect_get_new_ssa_name (rettype, vect_simple_var);
6654 gimple_call_set_lhs (new_stmt, op);
6655 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6656 var = make_ssa_name (vec_dest);
6657 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6658 new_stmt
6659 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6661 else
6663 var = make_ssa_name (vec_dest, new_stmt);
6664 gimple_call_set_lhs (new_stmt, var);
6667 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6669 if (modifier == NARROW)
6671 if ((j & 1) == 0)
6673 prev_res = var;
6674 continue;
6676 var = permute_vec_elements (prev_res, var,
6677 perm_mask, stmt, gsi);
6678 new_stmt = SSA_NAME_DEF_STMT (var);
6681 if (prev_stmt_info == NULL)
6682 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6683 else
6684 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6685 prev_stmt_info = vinfo_for_stmt (new_stmt);
6687 return true;
6689 else if (STMT_VINFO_STRIDED_P (stmt_info))
6691 gimple_stmt_iterator incr_gsi;
6692 bool insert_after;
6693 gimple *incr;
6694 tree offvar;
6695 tree ivstep;
6696 tree running_off;
6697 vec<constructor_elt, va_gc> *v = NULL;
6698 gimple_seq stmts = NULL;
6699 tree stride_base, stride_step, alias_off;
6701 gcc_assert (!nested_in_vect_loop);
6703 if (slp && grouped_load)
6704 first_dr = STMT_VINFO_DATA_REF
6705 (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
6706 else
6707 first_dr = dr;
6709 stride_base
6710 = fold_build_pointer_plus
6711 (DR_BASE_ADDRESS (first_dr),
6712 size_binop (PLUS_EXPR,
6713 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6714 convert_to_ptrofftype (DR_INIT (first_dr))));
6715 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6717 /* For a load with loop-invariant (but other than power-of-2)
6718 stride (i.e. not a grouped access) like so:
6720 for (i = 0; i < n; i += stride)
6721 ... = array[i];
6723 we generate a new induction variable and new accesses to
6724 form a new vector (or vectors, depending on ncopies):
6726 for (j = 0; ; j += VF*stride)
6727 tmp1 = array[j];
6728 tmp2 = array[j + stride];
6730 vectemp = {tmp1, tmp2, ...}
6733 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6734 build_int_cst (TREE_TYPE (stride_step), vf));
6736 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6738 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
6739 loop, &incr_gsi, insert_after,
6740 &offvar, NULL);
6741 incr = gsi_stmt (incr_gsi);
6742 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6744 stride_step = force_gimple_operand (unshare_expr (stride_step),
6745 &stmts, true, NULL_TREE);
6746 if (stmts)
6747 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6749 prev_stmt_info = NULL;
6750 running_off = offvar;
6751 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
6752 int nloads = nunits;
6753 tree ltype = TREE_TYPE (vectype);
6754 auto_vec<tree> dr_chain;
6755 if (slp)
6757 nloads = nunits / group_size;
6758 if (group_size < nunits)
6759 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
6760 else
6761 ltype = vectype;
6762 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
6763 /* For SLP permutation support we need to load the whole group,
6764 not only the number of vector stmts the permutation result
6765 fits in. */
6766 if (slp_perm)
6768 ncopies = (group_size * vf + nunits - 1) / nunits;
6769 dr_chain.create (ncopies);
6771 else
6772 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6774 for (j = 0; j < ncopies; j++)
6776 tree vec_inv;
6778 if (nloads > 1)
6780 vec_alloc (v, nloads);
6781 for (i = 0; i < nloads; i++)
6783 tree newref, newoff;
6784 gimple *incr;
6785 newref = build2 (MEM_REF, ltype, running_off, alias_off);
6787 newref = force_gimple_operand_gsi (gsi, newref, true,
6788 NULL_TREE, true,
6789 GSI_SAME_STMT);
6790 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6791 newoff = copy_ssa_name (running_off);
6792 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6793 running_off, stride_step);
6794 vect_finish_stmt_generation (stmt, incr, gsi);
6796 running_off = newoff;
6799 vec_inv = build_constructor (vectype, v);
6800 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6801 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6803 else
6805 new_stmt = gimple_build_assign (make_ssa_name (ltype),
6806 build2 (MEM_REF, ltype,
6807 running_off, alias_off));
6808 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6810 tree newoff = copy_ssa_name (running_off);
6811 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6812 running_off, stride_step);
6813 vect_finish_stmt_generation (stmt, incr, gsi);
6815 running_off = newoff;
6818 if (slp)
6820 if (slp_perm)
6821 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
6822 else
6823 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6825 else
6827 if (j == 0)
6828 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6829 else
6830 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6831 prev_stmt_info = vinfo_for_stmt (new_stmt);
6834 if (slp_perm)
6835 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6836 slp_node_instance, false);
6837 return true;
6840 if (grouped_load)
6842 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6843 /* For SLP vectorization we directly vectorize a subchain
6844 without permutation. */
6845 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6846 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6847 /* For BB vectorization always use the first stmt to base
6848 the data ref pointer on. */
6849 if (bb_vinfo)
6850 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6852 /* Check if the chain of loads is already vectorized. */
6853 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6854 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6855 ??? But we can only do so if there is exactly one
6856 as we have no way to get at the rest. Leave the CSE
6857 opportunity alone.
6858 ??? With the group load eventually participating
6859 in multiple different permutations (having multiple
6860 slp nodes which refer to the same group) the CSE
6861 is even wrong code. See PR56270. */
6862 && !slp)
6864 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6865 return true;
6867 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6868 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6869 group_gap_adj = 0;
6871 /* VEC_NUM is the number of vect stmts to be created for this group. */
6872 if (slp)
6874 grouped_load = false;
6875 /* For SLP permutation support we need to load the whole group,
6876 not only the number of vector stmts the permutation result
6877 fits in. */
6878 if (slp_perm)
6879 vec_num = (group_size * vf + nunits - 1) / nunits;
6880 else
6881 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6882 group_gap_adj = vf * group_size - nunits * vec_num;
6884 else
6885 vec_num = group_size;
6887 else
6889 first_stmt = stmt;
6890 first_dr = dr;
6891 group_size = vec_num = 1;
6892 group_gap_adj = 0;
6895 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6896 gcc_assert (alignment_support_scheme);
6897 /* Targets with load-lane instructions must not require explicit
6898 realignment. */
6899 gcc_assert (!load_lanes_p
6900 || alignment_support_scheme == dr_aligned
6901 || alignment_support_scheme == dr_unaligned_supported);
6903 /* In case the vectorization factor (VF) is bigger than the number
6904 of elements that we can fit in a vectype (nunits), we have to generate
6905 more than one vector stmt - i.e - we need to "unroll" the
6906 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6907 from one copy of the vector stmt to the next, in the field
6908 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6909 stages to find the correct vector defs to be used when vectorizing
6910 stmts that use the defs of the current stmt. The example below
6911 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6912 need to create 4 vectorized stmts):
6914 before vectorization:
6915 RELATED_STMT VEC_STMT
6916 S1: x = memref - -
6917 S2: z = x + 1 - -
6919 step 1: vectorize stmt S1:
6920 We first create the vector stmt VS1_0, and, as usual, record a
6921 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6922 Next, we create the vector stmt VS1_1, and record a pointer to
6923 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6924 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6925 stmts and pointers:
6926 RELATED_STMT VEC_STMT
6927 VS1_0: vx0 = memref0 VS1_1 -
6928 VS1_1: vx1 = memref1 VS1_2 -
6929 VS1_2: vx2 = memref2 VS1_3 -
6930 VS1_3: vx3 = memref3 - -
6931 S1: x = load - VS1_0
6932 S2: z = x + 1 - -
6934 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6935 information we recorded in RELATED_STMT field is used to vectorize
6936 stmt S2. */
6938 /* In case of interleaving (non-unit grouped access):
6940 S1: x2 = &base + 2
6941 S2: x0 = &base
6942 S3: x1 = &base + 1
6943 S4: x3 = &base + 3
6945 Vectorized loads are created in the order of memory accesses
6946 starting from the access of the first stmt of the chain:
6948 VS1: vx0 = &base
6949 VS2: vx1 = &base + vec_size*1
6950 VS3: vx3 = &base + vec_size*2
6951 VS4: vx4 = &base + vec_size*3
6953 Then permutation statements are generated:
6955 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6956 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6959 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6960 (the order of the data-refs in the output of vect_permute_load_chain
6961 corresponds to the order of scalar stmts in the interleaving chain - see
6962 the documentation of vect_permute_load_chain()).
6963 The generation of permutation stmts and recording them in
6964 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6966 In case of both multiple types and interleaving, the vector loads and
6967 permutation stmts above are created for every copy. The result vector
6968 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6969 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6971 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6972 on a target that supports unaligned accesses (dr_unaligned_supported)
6973 we generate the following code:
6974 p = initial_addr;
6975 indx = 0;
6976 loop {
6977 p = p + indx * vectype_size;
6978 vec_dest = *(p);
6979 indx = indx + 1;
6982 Otherwise, the data reference is potentially unaligned on a target that
6983 does not support unaligned accesses (dr_explicit_realign_optimized) -
6984 then generate the following code, in which the data in each iteration is
6985 obtained by two vector loads, one from the previous iteration, and one
6986 from the current iteration:
6987 p1 = initial_addr;
6988 msq_init = *(floor(p1))
6989 p2 = initial_addr + VS - 1;
6990 realignment_token = call target_builtin;
6991 indx = 0;
6992 loop {
6993 p2 = p2 + indx * vectype_size
6994 lsq = *(floor(p2))
6995 vec_dest = realign_load (msq, lsq, realignment_token)
6996 indx = indx + 1;
6997 msq = lsq;
6998 } */
7000 /* If the misalignment remains the same throughout the execution of the
7001 loop, we can create the init_addr and permutation mask at the loop
7002 preheader. Otherwise, it needs to be created inside the loop.
7003 This can only occur when vectorizing memory accesses in the inner-loop
7004 nested within an outer-loop that is being vectorized. */
7006 if (nested_in_vect_loop
7007 && (TREE_INT_CST_LOW (DR_STEP (dr))
7008 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
7010 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7011 compute_in_loop = true;
7014 if ((alignment_support_scheme == dr_explicit_realign_optimized
7015 || alignment_support_scheme == dr_explicit_realign)
7016 && !compute_in_loop)
7018 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7019 alignment_support_scheme, NULL_TREE,
7020 &at_loop);
7021 if (alignment_support_scheme == dr_explicit_realign_optimized)
7023 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7024 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7025 size_one_node);
7028 else
7029 at_loop = loop;
7031 if (negative)
7032 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7034 if (load_lanes_p)
7035 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7036 else
7037 aggr_type = vectype;
7039 prev_stmt_info = NULL;
7040 for (j = 0; j < ncopies; j++)
7042 /* 1. Create the vector or array pointer update chain. */
7043 if (j == 0)
7045 bool simd_lane_access_p
7046 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7047 if (simd_lane_access_p
7048 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7049 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7050 && integer_zerop (DR_OFFSET (first_dr))
7051 && integer_zerop (DR_INIT (first_dr))
7052 && alias_sets_conflict_p (get_alias_set (aggr_type),
7053 get_alias_set (DR_REF (first_dr)))
7054 && (alignment_support_scheme == dr_aligned
7055 || alignment_support_scheme == dr_unaligned_supported))
7057 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7058 dataref_offset = build_int_cst (reference_alias_ptr_type
7059 (DR_REF (first_dr)), 0);
7060 inv_p = false;
7062 else if (first_stmt_for_drptr
7063 && first_stmt != first_stmt_for_drptr)
7065 dataref_ptr
7066 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7067 at_loop, offset, &dummy, gsi,
7068 &ptr_incr, simd_lane_access_p,
7069 &inv_p, byte_offset);
7070 /* Adjust the pointer by the difference to first_stmt. */
7071 data_reference_p ptrdr
7072 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7073 tree diff = fold_convert (sizetype,
7074 size_binop (MINUS_EXPR,
7075 DR_INIT (first_dr),
7076 DR_INIT (ptrdr)));
7077 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7078 stmt, diff);
7080 else
7081 dataref_ptr
7082 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7083 offset, &dummy, gsi, &ptr_incr,
7084 simd_lane_access_p, &inv_p,
7085 byte_offset);
7087 else if (dataref_offset)
7088 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7089 TYPE_SIZE_UNIT (aggr_type));
7090 else
7091 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7092 TYPE_SIZE_UNIT (aggr_type));
7094 if (grouped_load || slp_perm)
7095 dr_chain.create (vec_num);
7097 if (load_lanes_p)
7099 tree vec_array;
7101 vec_array = create_vector_array (vectype, vec_num);
7103 /* Emit:
7104 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7105 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
7106 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
7107 gimple_call_set_lhs (new_stmt, vec_array);
7108 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7110 /* Extract each vector into an SSA_NAME. */
7111 for (i = 0; i < vec_num; i++)
7113 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7114 vec_array, i);
7115 dr_chain.quick_push (new_temp);
7118 /* Record the mapping between SSA_NAMEs and statements. */
7119 vect_record_grouped_load_vectors (stmt, dr_chain);
7121 else
7123 for (i = 0; i < vec_num; i++)
7125 if (i > 0)
7126 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7127 stmt, NULL_TREE);
7129 /* 2. Create the vector-load in the loop. */
7130 switch (alignment_support_scheme)
7132 case dr_aligned:
7133 case dr_unaligned_supported:
7135 unsigned int align, misalign;
7137 data_ref
7138 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7139 dataref_offset
7140 ? dataref_offset
7141 : build_int_cst (reference_alias_ptr_type
7142 (DR_REF (first_dr)), 0));
7143 align = TYPE_ALIGN_UNIT (vectype);
7144 if (alignment_support_scheme == dr_aligned)
7146 gcc_assert (aligned_access_p (first_dr));
7147 misalign = 0;
7149 else if (DR_MISALIGNMENT (first_dr) == -1)
7151 if (DR_VECT_AUX (first_dr)->base_element_aligned)
7152 align = TYPE_ALIGN_UNIT (elem_type);
7153 else
7154 align = (get_object_alignment (DR_REF (first_dr))
7155 / BITS_PER_UNIT);
7156 misalign = 0;
7157 TREE_TYPE (data_ref)
7158 = build_aligned_type (TREE_TYPE (data_ref),
7159 align * BITS_PER_UNIT);
7161 else
7163 TREE_TYPE (data_ref)
7164 = build_aligned_type (TREE_TYPE (data_ref),
7165 TYPE_ALIGN (elem_type));
7166 misalign = DR_MISALIGNMENT (first_dr);
7168 if (dataref_offset == NULL_TREE
7169 && TREE_CODE (dataref_ptr) == SSA_NAME)
7170 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7171 align, misalign);
7172 break;
7174 case dr_explicit_realign:
7176 tree ptr, bump;
7178 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7180 if (compute_in_loop)
7181 msq = vect_setup_realignment (first_stmt, gsi,
7182 &realignment_token,
7183 dr_explicit_realign,
7184 dataref_ptr, NULL);
7186 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7187 ptr = copy_ssa_name (dataref_ptr);
7188 else
7189 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7190 new_stmt = gimple_build_assign
7191 (ptr, BIT_AND_EXPR, dataref_ptr,
7192 build_int_cst
7193 (TREE_TYPE (dataref_ptr),
7194 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7195 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7196 data_ref
7197 = build2 (MEM_REF, vectype, ptr,
7198 build_int_cst (reference_alias_ptr_type
7199 (DR_REF (first_dr)), 0));
7200 vec_dest = vect_create_destination_var (scalar_dest,
7201 vectype);
7202 new_stmt = gimple_build_assign (vec_dest, data_ref);
7203 new_temp = make_ssa_name (vec_dest, new_stmt);
7204 gimple_assign_set_lhs (new_stmt, new_temp);
7205 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7206 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7207 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7208 msq = new_temp;
7210 bump = size_binop (MULT_EXPR, vs,
7211 TYPE_SIZE_UNIT (elem_type));
7212 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7213 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7214 new_stmt = gimple_build_assign
7215 (NULL_TREE, BIT_AND_EXPR, ptr,
7216 build_int_cst
7217 (TREE_TYPE (ptr),
7218 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7219 ptr = copy_ssa_name (ptr, new_stmt);
7220 gimple_assign_set_lhs (new_stmt, ptr);
7221 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7222 data_ref
7223 = build2 (MEM_REF, vectype, ptr,
7224 build_int_cst (reference_alias_ptr_type
7225 (DR_REF (first_dr)), 0));
7226 break;
7228 case dr_explicit_realign_optimized:
7229 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7230 new_temp = copy_ssa_name (dataref_ptr);
7231 else
7232 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7233 new_stmt = gimple_build_assign
7234 (new_temp, BIT_AND_EXPR, dataref_ptr,
7235 build_int_cst
7236 (TREE_TYPE (dataref_ptr),
7237 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7238 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7239 data_ref
7240 = build2 (MEM_REF, vectype, new_temp,
7241 build_int_cst (reference_alias_ptr_type
7242 (DR_REF (first_dr)), 0));
7243 break;
7244 default:
7245 gcc_unreachable ();
7247 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7248 new_stmt = gimple_build_assign (vec_dest, data_ref);
7249 new_temp = make_ssa_name (vec_dest, new_stmt);
7250 gimple_assign_set_lhs (new_stmt, new_temp);
7251 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7253 /* 3. Handle explicit realignment if necessary/supported.
7254 Create in loop:
7255 vec_dest = realign_load (msq, lsq, realignment_token) */
7256 if (alignment_support_scheme == dr_explicit_realign_optimized
7257 || alignment_support_scheme == dr_explicit_realign)
7259 lsq = gimple_assign_lhs (new_stmt);
7260 if (!realignment_token)
7261 realignment_token = dataref_ptr;
7262 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7263 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7264 msq, lsq, realignment_token);
7265 new_temp = make_ssa_name (vec_dest, new_stmt);
7266 gimple_assign_set_lhs (new_stmt, new_temp);
7267 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7269 if (alignment_support_scheme == dr_explicit_realign_optimized)
7271 gcc_assert (phi);
7272 if (i == vec_num - 1 && j == ncopies - 1)
7273 add_phi_arg (phi, lsq,
7274 loop_latch_edge (containing_loop),
7275 UNKNOWN_LOCATION);
7276 msq = lsq;
7280 /* 4. Handle invariant-load. */
7281 if (inv_p && !bb_vinfo)
7283 gcc_assert (!grouped_load);
7284 /* If we have versioned for aliasing or the loop doesn't
7285 have any data dependencies that would preclude this,
7286 then we are sure this is a loop invariant load and
7287 thus we can insert it on the preheader edge. */
7288 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7289 && !nested_in_vect_loop
7290 && hoist_defs_of_uses (stmt, loop))
7292 if (dump_enabled_p ())
7294 dump_printf_loc (MSG_NOTE, vect_location,
7295 "hoisting out of the vectorized "
7296 "loop: ");
7297 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7299 tree tem = copy_ssa_name (scalar_dest);
7300 gsi_insert_on_edge_immediate
7301 (loop_preheader_edge (loop),
7302 gimple_build_assign (tem,
7303 unshare_expr
7304 (gimple_assign_rhs1 (stmt))));
7305 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7306 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7307 set_vinfo_for_stmt (new_stmt,
7308 new_stmt_vec_info (new_stmt, vinfo));
7310 else
7312 gimple_stmt_iterator gsi2 = *gsi;
7313 gsi_next (&gsi2);
7314 new_temp = vect_init_vector (stmt, scalar_dest,
7315 vectype, &gsi2);
7316 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7320 if (negative)
7322 tree perm_mask = perm_mask_for_reverse (vectype);
7323 new_temp = permute_vec_elements (new_temp, new_temp,
7324 perm_mask, stmt, gsi);
7325 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7328 /* Collect vector loads and later create their permutation in
7329 vect_transform_grouped_load (). */
7330 if (grouped_load || slp_perm)
7331 dr_chain.quick_push (new_temp);
7333 /* Store vector loads in the corresponding SLP_NODE. */
7334 if (slp && !slp_perm)
7335 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7337 /* Bump the vector pointer to account for a gap or for excess
7338 elements loaded for a permuted SLP load. */
7339 if (group_gap_adj != 0)
7341 bool ovf;
7342 tree bump
7343 = wide_int_to_tree (sizetype,
7344 wi::smul (TYPE_SIZE_UNIT (elem_type),
7345 group_gap_adj, &ovf));
7346 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7347 stmt, bump);
7351 if (slp && !slp_perm)
7352 continue;
7354 if (slp_perm)
7356 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7357 slp_node_instance, false))
7359 dr_chain.release ();
7360 return false;
7363 else
7365 if (grouped_load)
7367 if (!load_lanes_p)
7368 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7369 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7371 else
7373 if (j == 0)
7374 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7375 else
7376 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7377 prev_stmt_info = vinfo_for_stmt (new_stmt);
7380 dr_chain.release ();
7383 return true;
7386 /* Function vect_is_simple_cond.
7388 Input:
7389 LOOP - the loop that is being vectorized.
7390 COND - Condition that is checked for simple use.
7392 Output:
7393 *COMP_VECTYPE - the vector type for the comparison.
7395 Returns whether a COND can be vectorized. Checks whether
7396 condition operands are supportable using vec_is_simple_use. */
7398 static bool
7399 vect_is_simple_cond (tree cond, vec_info *vinfo, tree *comp_vectype)
7401 tree lhs, rhs;
7402 enum vect_def_type dt;
7403 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7405 /* Mask case. */
7406 if (TREE_CODE (cond) == SSA_NAME
7407 && TREE_CODE (TREE_TYPE (cond)) == BOOLEAN_TYPE)
7409 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7410 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7411 &dt, comp_vectype)
7412 || !*comp_vectype
7413 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7414 return false;
7415 return true;
7418 if (!COMPARISON_CLASS_P (cond))
7419 return false;
7421 lhs = TREE_OPERAND (cond, 0);
7422 rhs = TREE_OPERAND (cond, 1);
7424 if (TREE_CODE (lhs) == SSA_NAME)
7426 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7427 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dt, &vectype1))
7428 return false;
7430 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
7431 && TREE_CODE (lhs) != FIXED_CST)
7432 return false;
7434 if (TREE_CODE (rhs) == SSA_NAME)
7436 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7437 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dt, &vectype2))
7438 return false;
7440 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
7441 && TREE_CODE (rhs) != FIXED_CST)
7442 return false;
7444 *comp_vectype = vectype1 ? vectype1 : vectype2;
7445 return true;
7448 /* vectorizable_condition.
7450 Check if STMT is conditional modify expression that can be vectorized.
7451 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7452 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7453 at GSI.
7455 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7456 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7457 else clause if it is 2).
7459 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7461 bool
7462 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7463 gimple **vec_stmt, tree reduc_def, int reduc_index,
7464 slp_tree slp_node)
7466 tree scalar_dest = NULL_TREE;
7467 tree vec_dest = NULL_TREE;
7468 tree cond_expr, then_clause, else_clause;
7469 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7470 tree comp_vectype = NULL_TREE;
7471 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7472 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7473 tree vec_compare, vec_cond_expr;
7474 tree new_temp;
7475 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7476 enum vect_def_type dt, dts[4];
7477 int ncopies;
7478 enum tree_code code;
7479 stmt_vec_info prev_stmt_info = NULL;
7480 int i, j;
7481 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7482 vec<tree> vec_oprnds0 = vNULL;
7483 vec<tree> vec_oprnds1 = vNULL;
7484 vec<tree> vec_oprnds2 = vNULL;
7485 vec<tree> vec_oprnds3 = vNULL;
7486 tree vec_cmp_type;
7487 bool masked = false;
7489 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7490 return false;
7492 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7494 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7495 return false;
7497 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7498 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7499 && reduc_def))
7500 return false;
7502 /* FORNOW: not yet supported. */
7503 if (STMT_VINFO_LIVE_P (stmt_info))
7505 if (dump_enabled_p ())
7506 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7507 "value used after loop.\n");
7508 return false;
7512 /* Is vectorizable conditional operation? */
7513 if (!is_gimple_assign (stmt))
7514 return false;
7516 code = gimple_assign_rhs_code (stmt);
7518 if (code != COND_EXPR)
7519 return false;
7521 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7522 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
7524 if (slp_node || PURE_SLP_STMT (stmt_info))
7525 ncopies = 1;
7526 else
7527 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7529 gcc_assert (ncopies >= 1);
7530 if (reduc_index && ncopies > 1)
7531 return false; /* FORNOW */
7533 cond_expr = gimple_assign_rhs1 (stmt);
7534 then_clause = gimple_assign_rhs2 (stmt);
7535 else_clause = gimple_assign_rhs3 (stmt);
7537 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, &comp_vectype)
7538 || !comp_vectype)
7539 return false;
7541 gimple *def_stmt;
7542 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dt))
7543 return false;
7544 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dt))
7545 return false;
7547 if (VECTOR_BOOLEAN_TYPE_P (comp_vectype))
7549 vec_cmp_type = comp_vectype;
7550 masked = true;
7552 else
7553 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7554 if (vec_cmp_type == NULL_TREE)
7555 return false;
7557 if (!vec_stmt)
7559 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7560 return expand_vec_cond_expr_p (vectype, comp_vectype);
7563 /* Transform. */
7565 if (!slp_node)
7567 vec_oprnds0.create (1);
7568 vec_oprnds1.create (1);
7569 vec_oprnds2.create (1);
7570 vec_oprnds3.create (1);
7573 /* Handle def. */
7574 scalar_dest = gimple_assign_lhs (stmt);
7575 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7577 /* Handle cond expr. */
7578 for (j = 0; j < ncopies; j++)
7580 gassign *new_stmt = NULL;
7581 if (j == 0)
7583 if (slp_node)
7585 auto_vec<tree, 4> ops;
7586 auto_vec<vec<tree>, 4> vec_defs;
7588 if (masked)
7589 ops.safe_push (cond_expr);
7590 else
7592 ops.safe_push (TREE_OPERAND (cond_expr, 0));
7593 ops.safe_push (TREE_OPERAND (cond_expr, 1));
7595 ops.safe_push (then_clause);
7596 ops.safe_push (else_clause);
7597 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7598 vec_oprnds3 = vec_defs.pop ();
7599 vec_oprnds2 = vec_defs.pop ();
7600 if (!masked)
7601 vec_oprnds1 = vec_defs.pop ();
7602 vec_oprnds0 = vec_defs.pop ();
7604 ops.release ();
7605 vec_defs.release ();
7607 else
7609 gimple *gtemp;
7610 if (masked)
7612 vec_cond_lhs
7613 = vect_get_vec_def_for_operand (cond_expr, stmt,
7614 comp_vectype);
7615 vect_is_simple_use (cond_expr, stmt_info->vinfo,
7616 &gtemp, &dts[0]);
7618 else
7620 vec_cond_lhs =
7621 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
7622 stmt, comp_vectype);
7623 vect_is_simple_use (TREE_OPERAND (cond_expr, 0),
7624 loop_vinfo, &gtemp, &dts[0]);
7626 vec_cond_rhs =
7627 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
7628 stmt, comp_vectype);
7629 vect_is_simple_use (TREE_OPERAND (cond_expr, 1),
7630 loop_vinfo, &gtemp, &dts[1]);
7632 if (reduc_index == 1)
7633 vec_then_clause = reduc_def;
7634 else
7636 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
7637 stmt);
7638 vect_is_simple_use (then_clause, loop_vinfo,
7639 &gtemp, &dts[2]);
7641 if (reduc_index == 2)
7642 vec_else_clause = reduc_def;
7643 else
7645 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
7646 stmt);
7647 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
7651 else
7653 vec_cond_lhs
7654 = vect_get_vec_def_for_stmt_copy (dts[0],
7655 vec_oprnds0.pop ());
7656 if (!masked)
7657 vec_cond_rhs
7658 = vect_get_vec_def_for_stmt_copy (dts[1],
7659 vec_oprnds1.pop ());
7661 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
7662 vec_oprnds2.pop ());
7663 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
7664 vec_oprnds3.pop ());
7667 if (!slp_node)
7669 vec_oprnds0.quick_push (vec_cond_lhs);
7670 if (!masked)
7671 vec_oprnds1.quick_push (vec_cond_rhs);
7672 vec_oprnds2.quick_push (vec_then_clause);
7673 vec_oprnds3.quick_push (vec_else_clause);
7676 /* Arguments are ready. Create the new vector stmt. */
7677 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
7679 vec_then_clause = vec_oprnds2[i];
7680 vec_else_clause = vec_oprnds3[i];
7682 if (masked)
7683 vec_compare = vec_cond_lhs;
7684 else
7686 vec_cond_rhs = vec_oprnds1[i];
7687 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
7688 vec_cond_lhs, vec_cond_rhs);
7690 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
7691 vec_compare, vec_then_clause, vec_else_clause);
7693 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
7694 new_temp = make_ssa_name (vec_dest, new_stmt);
7695 gimple_assign_set_lhs (new_stmt, new_temp);
7696 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7697 if (slp_node)
7698 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7701 if (slp_node)
7702 continue;
7704 if (j == 0)
7705 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7706 else
7707 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7709 prev_stmt_info = vinfo_for_stmt (new_stmt);
7712 vec_oprnds0.release ();
7713 vec_oprnds1.release ();
7714 vec_oprnds2.release ();
7715 vec_oprnds3.release ();
7717 return true;
7720 /* vectorizable_comparison.
7722 Check if STMT is comparison expression that can be vectorized.
7723 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7724 comparison, put it in VEC_STMT, and insert it at GSI.
7726 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7728 bool
7729 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
7730 gimple **vec_stmt, tree reduc_def,
7731 slp_tree slp_node)
7733 tree lhs, rhs1, rhs2;
7734 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7735 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7736 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7737 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
7738 tree new_temp;
7739 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7740 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
7741 unsigned nunits;
7742 int ncopies;
7743 enum tree_code code;
7744 stmt_vec_info prev_stmt_info = NULL;
7745 int i, j;
7746 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7747 vec<tree> vec_oprnds0 = vNULL;
7748 vec<tree> vec_oprnds1 = vNULL;
7749 gimple *def_stmt;
7750 tree mask_type;
7751 tree mask;
7753 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7754 return false;
7756 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
7757 return false;
7759 mask_type = vectype;
7760 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7762 if (slp_node || PURE_SLP_STMT (stmt_info))
7763 ncopies = 1;
7764 else
7765 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7767 gcc_assert (ncopies >= 1);
7768 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7769 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7770 && reduc_def))
7771 return false;
7773 if (STMT_VINFO_LIVE_P (stmt_info))
7775 if (dump_enabled_p ())
7776 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7777 "value used after loop.\n");
7778 return false;
7781 if (!is_gimple_assign (stmt))
7782 return false;
7784 code = gimple_assign_rhs_code (stmt);
7786 if (TREE_CODE_CLASS (code) != tcc_comparison)
7787 return false;
7789 rhs1 = gimple_assign_rhs1 (stmt);
7790 rhs2 = gimple_assign_rhs2 (stmt);
7792 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
7793 &dts[0], &vectype1))
7794 return false;
7796 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
7797 &dts[1], &vectype2))
7798 return false;
7800 if (vectype1 && vectype2
7801 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7802 return false;
7804 vectype = vectype1 ? vectype1 : vectype2;
7806 /* Invariant comparison. */
7807 if (!vectype)
7809 vectype = build_vector_type (TREE_TYPE (rhs1), nunits);
7810 if (tree_to_shwi (TYPE_SIZE_UNIT (vectype)) != current_vector_size)
7811 return false;
7813 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
7814 return false;
7816 if (!vec_stmt)
7818 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
7819 vect_model_simple_cost (stmt_info, ncopies, dts, NULL, NULL);
7820 return expand_vec_cmp_expr_p (vectype, mask_type);
7823 /* Transform. */
7824 if (!slp_node)
7826 vec_oprnds0.create (1);
7827 vec_oprnds1.create (1);
7830 /* Handle def. */
7831 lhs = gimple_assign_lhs (stmt);
7832 mask = vect_create_destination_var (lhs, mask_type);
7834 /* Handle cmp expr. */
7835 for (j = 0; j < ncopies; j++)
7837 gassign *new_stmt = NULL;
7838 if (j == 0)
7840 if (slp_node)
7842 auto_vec<tree, 2> ops;
7843 auto_vec<vec<tree>, 2> vec_defs;
7845 ops.safe_push (rhs1);
7846 ops.safe_push (rhs2);
7847 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7848 vec_oprnds1 = vec_defs.pop ();
7849 vec_oprnds0 = vec_defs.pop ();
7851 else
7853 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
7854 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
7857 else
7859 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
7860 vec_oprnds0.pop ());
7861 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
7862 vec_oprnds1.pop ());
7865 if (!slp_node)
7867 vec_oprnds0.quick_push (vec_rhs1);
7868 vec_oprnds1.quick_push (vec_rhs2);
7871 /* Arguments are ready. Create the new vector stmt. */
7872 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
7874 vec_rhs2 = vec_oprnds1[i];
7876 new_temp = make_ssa_name (mask);
7877 new_stmt = gimple_build_assign (new_temp, code, vec_rhs1, vec_rhs2);
7878 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7879 if (slp_node)
7880 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7883 if (slp_node)
7884 continue;
7886 if (j == 0)
7887 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7888 else
7889 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7891 prev_stmt_info = vinfo_for_stmt (new_stmt);
7894 vec_oprnds0.release ();
7895 vec_oprnds1.release ();
7897 return true;
7900 /* Make sure the statement is vectorizable. */
7902 bool
7903 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
7905 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7906 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7907 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
7908 bool ok;
7909 tree scalar_type, vectype;
7910 gimple *pattern_stmt;
7911 gimple_seq pattern_def_seq;
7913 if (dump_enabled_p ())
7915 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
7916 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7919 if (gimple_has_volatile_ops (stmt))
7921 if (dump_enabled_p ())
7922 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7923 "not vectorized: stmt has volatile operands\n");
7925 return false;
7928 /* Skip stmts that do not need to be vectorized. In loops this is expected
7929 to include:
7930 - the COND_EXPR which is the loop exit condition
7931 - any LABEL_EXPRs in the loop
7932 - computations that are used only for array indexing or loop control.
7933 In basic blocks we only analyze statements that are a part of some SLP
7934 instance, therefore, all the statements are relevant.
7936 Pattern statement needs to be analyzed instead of the original statement
7937 if the original statement is not relevant. Otherwise, we analyze both
7938 statements. In basic blocks we are called from some SLP instance
7939 traversal, don't analyze pattern stmts instead, the pattern stmts
7940 already will be part of SLP instance. */
7942 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
7943 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7944 && !STMT_VINFO_LIVE_P (stmt_info))
7946 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7947 && pattern_stmt
7948 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7949 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7951 /* Analyze PATTERN_STMT instead of the original stmt. */
7952 stmt = pattern_stmt;
7953 stmt_info = vinfo_for_stmt (pattern_stmt);
7954 if (dump_enabled_p ())
7956 dump_printf_loc (MSG_NOTE, vect_location,
7957 "==> examining pattern statement: ");
7958 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7961 else
7963 if (dump_enabled_p ())
7964 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
7966 return true;
7969 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7970 && node == NULL
7971 && pattern_stmt
7972 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7973 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7975 /* Analyze PATTERN_STMT too. */
7976 if (dump_enabled_p ())
7978 dump_printf_loc (MSG_NOTE, vect_location,
7979 "==> examining pattern statement: ");
7980 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7983 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7984 return false;
7987 if (is_pattern_stmt_p (stmt_info)
7988 && node == NULL
7989 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
7991 gimple_stmt_iterator si;
7993 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7995 gimple *pattern_def_stmt = gsi_stmt (si);
7996 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7997 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7999 /* Analyze def stmt of STMT if it's a pattern stmt. */
8000 if (dump_enabled_p ())
8002 dump_printf_loc (MSG_NOTE, vect_location,
8003 "==> examining pattern def statement: ");
8004 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8007 if (!vect_analyze_stmt (pattern_def_stmt,
8008 need_to_vectorize, node))
8009 return false;
8014 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8016 case vect_internal_def:
8017 break;
8019 case vect_reduction_def:
8020 case vect_nested_cycle:
8021 gcc_assert (!bb_vinfo
8022 && (relevance == vect_used_in_outer
8023 || relevance == vect_used_in_outer_by_reduction
8024 || relevance == vect_used_by_reduction
8025 || relevance == vect_unused_in_scope));
8026 break;
8028 case vect_induction_def:
8029 case vect_constant_def:
8030 case vect_external_def:
8031 case vect_unknown_def_type:
8032 default:
8033 gcc_unreachable ();
8036 if (bb_vinfo)
8038 gcc_assert (PURE_SLP_STMT (stmt_info));
8040 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
8041 if (dump_enabled_p ())
8043 dump_printf_loc (MSG_NOTE, vect_location,
8044 "get vectype for scalar type: ");
8045 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
8046 dump_printf (MSG_NOTE, "\n");
8049 vectype = get_vectype_for_scalar_type (scalar_type);
8050 if (!vectype)
8052 if (dump_enabled_p ())
8054 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8055 "not SLPed: unsupported data-type ");
8056 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
8057 scalar_type);
8058 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8060 return false;
8063 if (dump_enabled_p ())
8065 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
8066 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
8067 dump_printf (MSG_NOTE, "\n");
8070 STMT_VINFO_VECTYPE (stmt_info) = vectype;
8073 if (STMT_VINFO_RELEVANT_P (stmt_info))
8075 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8076 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8077 || (is_gimple_call (stmt)
8078 && gimple_call_lhs (stmt) == NULL_TREE));
8079 *need_to_vectorize = true;
8082 if (PURE_SLP_STMT (stmt_info) && !node)
8084 dump_printf_loc (MSG_NOTE, vect_location,
8085 "handled only by SLP analysis\n");
8086 return true;
8089 ok = true;
8090 if (!bb_vinfo
8091 && (STMT_VINFO_RELEVANT_P (stmt_info)
8092 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8093 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8094 || vectorizable_conversion (stmt, NULL, NULL, node)
8095 || vectorizable_shift (stmt, NULL, NULL, node)
8096 || vectorizable_operation (stmt, NULL, NULL, node)
8097 || vectorizable_assignment (stmt, NULL, NULL, node)
8098 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8099 || vectorizable_call (stmt, NULL, NULL, node)
8100 || vectorizable_store (stmt, NULL, NULL, node)
8101 || vectorizable_reduction (stmt, NULL, NULL, node)
8102 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8103 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8104 else
8106 if (bb_vinfo)
8107 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8108 || vectorizable_conversion (stmt, NULL, NULL, node)
8109 || vectorizable_shift (stmt, NULL, NULL, node)
8110 || vectorizable_operation (stmt, NULL, NULL, node)
8111 || vectorizable_assignment (stmt, NULL, NULL, node)
8112 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8113 || vectorizable_call (stmt, NULL, NULL, node)
8114 || vectorizable_store (stmt, NULL, NULL, node)
8115 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8116 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8119 if (!ok)
8121 if (dump_enabled_p ())
8123 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8124 "not vectorized: relevant stmt not ");
8125 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8126 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8129 return false;
8132 if (bb_vinfo)
8133 return true;
8135 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8136 need extra handling, except for vectorizable reductions. */
8137 if (STMT_VINFO_LIVE_P (stmt_info)
8138 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8139 ok = vectorizable_live_operation (stmt, NULL, NULL);
8141 if (!ok)
8143 if (dump_enabled_p ())
8145 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8146 "not vectorized: live stmt not ");
8147 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8148 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8151 return false;
8154 return true;
8158 /* Function vect_transform_stmt.
8160 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8162 bool
8163 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8164 bool *grouped_store, slp_tree slp_node,
8165 slp_instance slp_node_instance)
8167 bool is_store = false;
8168 gimple *vec_stmt = NULL;
8169 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8170 bool done;
8172 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8174 switch (STMT_VINFO_TYPE (stmt_info))
8176 case type_demotion_vec_info_type:
8177 case type_promotion_vec_info_type:
8178 case type_conversion_vec_info_type:
8179 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8180 gcc_assert (done);
8181 break;
8183 case induc_vec_info_type:
8184 gcc_assert (!slp_node);
8185 done = vectorizable_induction (stmt, gsi, &vec_stmt);
8186 gcc_assert (done);
8187 break;
8189 case shift_vec_info_type:
8190 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8191 gcc_assert (done);
8192 break;
8194 case op_vec_info_type:
8195 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8196 gcc_assert (done);
8197 break;
8199 case assignment_vec_info_type:
8200 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8201 gcc_assert (done);
8202 break;
8204 case load_vec_info_type:
8205 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8206 slp_node_instance);
8207 gcc_assert (done);
8208 break;
8210 case store_vec_info_type:
8211 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8212 gcc_assert (done);
8213 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8215 /* In case of interleaving, the whole chain is vectorized when the
8216 last store in the chain is reached. Store stmts before the last
8217 one are skipped, and there vec_stmt_info shouldn't be freed
8218 meanwhile. */
8219 *grouped_store = true;
8220 if (STMT_VINFO_VEC_STMT (stmt_info))
8221 is_store = true;
8223 else
8224 is_store = true;
8225 break;
8227 case condition_vec_info_type:
8228 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8229 gcc_assert (done);
8230 break;
8232 case comparison_vec_info_type:
8233 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8234 gcc_assert (done);
8235 break;
8237 case call_vec_info_type:
8238 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8239 stmt = gsi_stmt (*gsi);
8240 if (is_gimple_call (stmt)
8241 && gimple_call_internal_p (stmt)
8242 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
8243 is_store = true;
8244 break;
8246 case call_simd_clone_vec_info_type:
8247 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8248 stmt = gsi_stmt (*gsi);
8249 break;
8251 case reduc_vec_info_type:
8252 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
8253 gcc_assert (done);
8254 break;
8256 default:
8257 if (!STMT_VINFO_LIVE_P (stmt_info))
8259 if (dump_enabled_p ())
8260 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8261 "stmt not supported.\n");
8262 gcc_unreachable ();
8266 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8267 This would break hybrid SLP vectorization. */
8268 if (slp_node)
8269 gcc_assert (!vec_stmt
8270 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8272 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8273 is being vectorized, but outside the immediately enclosing loop. */
8274 if (vec_stmt
8275 && STMT_VINFO_LOOP_VINFO (stmt_info)
8276 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8277 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8278 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8279 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8280 || STMT_VINFO_RELEVANT (stmt_info) ==
8281 vect_used_in_outer_by_reduction))
8283 struct loop *innerloop = LOOP_VINFO_LOOP (
8284 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8285 imm_use_iterator imm_iter;
8286 use_operand_p use_p;
8287 tree scalar_dest;
8288 gimple *exit_phi;
8290 if (dump_enabled_p ())
8291 dump_printf_loc (MSG_NOTE, vect_location,
8292 "Record the vdef for outer-loop vectorization.\n");
8294 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8295 (to be used when vectorizing outer-loop stmts that use the DEF of
8296 STMT). */
8297 if (gimple_code (stmt) == GIMPLE_PHI)
8298 scalar_dest = PHI_RESULT (stmt);
8299 else
8300 scalar_dest = gimple_assign_lhs (stmt);
8302 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8304 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8306 exit_phi = USE_STMT (use_p);
8307 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8312 /* Handle stmts whose DEF is used outside the loop-nest that is
8313 being vectorized. */
8314 if (STMT_VINFO_LIVE_P (stmt_info)
8315 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8317 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
8318 gcc_assert (done);
8321 if (vec_stmt)
8322 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8324 return is_store;
8328 /* Remove a group of stores (for SLP or interleaving), free their
8329 stmt_vec_info. */
8331 void
8332 vect_remove_stores (gimple *first_stmt)
8334 gimple *next = first_stmt;
8335 gimple *tmp;
8336 gimple_stmt_iterator next_si;
8338 while (next)
8340 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8342 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8343 if (is_pattern_stmt_p (stmt_info))
8344 next = STMT_VINFO_RELATED_STMT (stmt_info);
8345 /* Free the attached stmt_vec_info and remove the stmt. */
8346 next_si = gsi_for_stmt (next);
8347 unlink_stmt_vdef (next);
8348 gsi_remove (&next_si, true);
8349 release_defs (next);
8350 free_stmt_vec_info (next);
8351 next = tmp;
8356 /* Function new_stmt_vec_info.
8358 Create and initialize a new stmt_vec_info struct for STMT. */
8360 stmt_vec_info
8361 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
8363 stmt_vec_info res;
8364 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8366 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8367 STMT_VINFO_STMT (res) = stmt;
8368 res->vinfo = vinfo;
8369 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
8370 STMT_VINFO_LIVE_P (res) = false;
8371 STMT_VINFO_VECTYPE (res) = NULL;
8372 STMT_VINFO_VEC_STMT (res) = NULL;
8373 STMT_VINFO_VECTORIZABLE (res) = true;
8374 STMT_VINFO_IN_PATTERN_P (res) = false;
8375 STMT_VINFO_RELATED_STMT (res) = NULL;
8376 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
8377 STMT_VINFO_DATA_REF (res) = NULL;
8378 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
8380 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
8381 STMT_VINFO_DR_OFFSET (res) = NULL;
8382 STMT_VINFO_DR_INIT (res) = NULL;
8383 STMT_VINFO_DR_STEP (res) = NULL;
8384 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
8386 if (gimple_code (stmt) == GIMPLE_PHI
8387 && is_loop_header_bb_p (gimple_bb (stmt)))
8388 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8389 else
8390 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8392 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
8393 STMT_SLP_TYPE (res) = loop_vect;
8394 STMT_VINFO_NUM_SLP_USES (res) = 0;
8396 GROUP_FIRST_ELEMENT (res) = NULL;
8397 GROUP_NEXT_ELEMENT (res) = NULL;
8398 GROUP_SIZE (res) = 0;
8399 GROUP_STORE_COUNT (res) = 0;
8400 GROUP_GAP (res) = 0;
8401 GROUP_SAME_DR_STMT (res) = NULL;
8403 return res;
8407 /* Create a hash table for stmt_vec_info. */
8409 void
8410 init_stmt_vec_info_vec (void)
8412 gcc_assert (!stmt_vec_info_vec.exists ());
8413 stmt_vec_info_vec.create (50);
8417 /* Free hash table for stmt_vec_info. */
8419 void
8420 free_stmt_vec_info_vec (void)
8422 unsigned int i;
8423 stmt_vec_info info;
8424 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
8425 if (info != NULL)
8426 free_stmt_vec_info (STMT_VINFO_STMT (info));
8427 gcc_assert (stmt_vec_info_vec.exists ());
8428 stmt_vec_info_vec.release ();
8432 /* Free stmt vectorization related info. */
8434 void
8435 free_stmt_vec_info (gimple *stmt)
8437 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8439 if (!stmt_info)
8440 return;
8442 /* Check if this statement has a related "pattern stmt"
8443 (introduced by the vectorizer during the pattern recognition
8444 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8445 too. */
8446 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
8448 stmt_vec_info patt_info
8449 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8450 if (patt_info)
8452 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
8453 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
8454 gimple_set_bb (patt_stmt, NULL);
8455 tree lhs = gimple_get_lhs (patt_stmt);
8456 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8457 release_ssa_name (lhs);
8458 if (seq)
8460 gimple_stmt_iterator si;
8461 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
8463 gimple *seq_stmt = gsi_stmt (si);
8464 gimple_set_bb (seq_stmt, NULL);
8465 lhs = gimple_get_lhs (seq_stmt);
8466 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8467 release_ssa_name (lhs);
8468 free_stmt_vec_info (seq_stmt);
8471 free_stmt_vec_info (patt_stmt);
8475 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
8476 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
8477 set_vinfo_for_stmt (stmt, NULL);
8478 free (stmt_info);
8482 /* Function get_vectype_for_scalar_type_and_size.
8484 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8485 by the target. */
8487 static tree
8488 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
8490 machine_mode inner_mode = TYPE_MODE (scalar_type);
8491 machine_mode simd_mode;
8492 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
8493 int nunits;
8494 tree vectype;
8496 if (nbytes == 0)
8497 return NULL_TREE;
8499 if (GET_MODE_CLASS (inner_mode) != MODE_INT
8500 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
8501 return NULL_TREE;
8503 /* For vector types of elements whose mode precision doesn't
8504 match their types precision we use a element type of mode
8505 precision. The vectorization routines will have to make sure
8506 they support the proper result truncation/extension.
8507 We also make sure to build vector types with INTEGER_TYPE
8508 component type only. */
8509 if (INTEGRAL_TYPE_P (scalar_type)
8510 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
8511 || TREE_CODE (scalar_type) != INTEGER_TYPE))
8512 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
8513 TYPE_UNSIGNED (scalar_type));
8515 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8516 When the component mode passes the above test simply use a type
8517 corresponding to that mode. The theory is that any use that
8518 would cause problems with this will disable vectorization anyway. */
8519 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
8520 && !INTEGRAL_TYPE_P (scalar_type))
8521 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
8523 /* We can't build a vector type of elements with alignment bigger than
8524 their size. */
8525 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
8526 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
8527 TYPE_UNSIGNED (scalar_type));
8529 /* If we felt back to using the mode fail if there was
8530 no scalar type for it. */
8531 if (scalar_type == NULL_TREE)
8532 return NULL_TREE;
8534 /* If no size was supplied use the mode the target prefers. Otherwise
8535 lookup a vector mode of the specified size. */
8536 if (size == 0)
8537 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
8538 else
8539 simd_mode = mode_for_vector (inner_mode, size / nbytes);
8540 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
8541 if (nunits <= 1)
8542 return NULL_TREE;
8544 vectype = build_vector_type (scalar_type, nunits);
8546 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8547 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
8548 return NULL_TREE;
8550 return vectype;
8553 unsigned int current_vector_size;
8555 /* Function get_vectype_for_scalar_type.
8557 Returns the vector type corresponding to SCALAR_TYPE as supported
8558 by the target. */
8560 tree
8561 get_vectype_for_scalar_type (tree scalar_type)
8563 tree vectype;
8564 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
8565 current_vector_size);
8566 if (vectype
8567 && current_vector_size == 0)
8568 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
8569 return vectype;
8572 /* Function get_mask_type_for_scalar_type.
8574 Returns the mask type corresponding to a result of comparison
8575 of vectors of specified SCALAR_TYPE as supported by target. */
8577 tree
8578 get_mask_type_for_scalar_type (tree scalar_type)
8580 tree vectype = get_vectype_for_scalar_type (scalar_type);
8582 if (!vectype)
8583 return NULL;
8585 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
8586 current_vector_size);
8589 /* Function get_same_sized_vectype
8591 Returns a vector type corresponding to SCALAR_TYPE of size
8592 VECTOR_TYPE if supported by the target. */
8594 tree
8595 get_same_sized_vectype (tree scalar_type, tree vector_type)
8597 if (TREE_CODE (scalar_type) == BOOLEAN_TYPE)
8598 return build_same_sized_truth_vector_type (vector_type);
8600 return get_vectype_for_scalar_type_and_size
8601 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
8604 /* Function vect_is_simple_use.
8606 Input:
8607 VINFO - the vect info of the loop or basic block that is being vectorized.
8608 OPERAND - operand in the loop or bb.
8609 Output:
8610 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
8611 DT - the type of definition
8613 Returns whether a stmt with OPERAND can be vectorized.
8614 For loops, supportable operands are constants, loop invariants, and operands
8615 that are defined by the current iteration of the loop. Unsupportable
8616 operands are those that are defined by a previous iteration of the loop (as
8617 is the case in reduction/induction computations).
8618 For basic blocks, supportable operands are constants and bb invariants.
8619 For now, operands defined outside the basic block are not supported. */
8621 bool
8622 vect_is_simple_use (tree operand, vec_info *vinfo,
8623 gimple **def_stmt, enum vect_def_type *dt)
8625 *def_stmt = NULL;
8626 *dt = vect_unknown_def_type;
8628 if (dump_enabled_p ())
8630 dump_printf_loc (MSG_NOTE, vect_location,
8631 "vect_is_simple_use: operand ");
8632 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
8633 dump_printf (MSG_NOTE, "\n");
8636 if (CONSTANT_CLASS_P (operand))
8638 *dt = vect_constant_def;
8639 return true;
8642 if (is_gimple_min_invariant (operand))
8644 *dt = vect_external_def;
8645 return true;
8648 if (TREE_CODE (operand) != SSA_NAME)
8650 if (dump_enabled_p ())
8651 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8652 "not ssa-name.\n");
8653 return false;
8656 if (SSA_NAME_IS_DEFAULT_DEF (operand))
8658 *dt = vect_external_def;
8659 return true;
8662 *def_stmt = SSA_NAME_DEF_STMT (operand);
8663 if (dump_enabled_p ())
8665 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
8666 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
8669 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
8670 *dt = vect_external_def;
8671 else
8673 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
8674 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
8677 if (dump_enabled_p ())
8679 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
8680 switch (*dt)
8682 case vect_uninitialized_def:
8683 dump_printf (MSG_NOTE, "uninitialized\n");
8684 break;
8685 case vect_constant_def:
8686 dump_printf (MSG_NOTE, "constant\n");
8687 break;
8688 case vect_external_def:
8689 dump_printf (MSG_NOTE, "external\n");
8690 break;
8691 case vect_internal_def:
8692 dump_printf (MSG_NOTE, "internal\n");
8693 break;
8694 case vect_induction_def:
8695 dump_printf (MSG_NOTE, "induction\n");
8696 break;
8697 case vect_reduction_def:
8698 dump_printf (MSG_NOTE, "reduction\n");
8699 break;
8700 case vect_double_reduction_def:
8701 dump_printf (MSG_NOTE, "double reduction\n");
8702 break;
8703 case vect_nested_cycle:
8704 dump_printf (MSG_NOTE, "nested cycle\n");
8705 break;
8706 case vect_unknown_def_type:
8707 dump_printf (MSG_NOTE, "unknown\n");
8708 break;
8712 if (*dt == vect_unknown_def_type)
8714 if (dump_enabled_p ())
8715 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8716 "Unsupported pattern.\n");
8717 return false;
8720 switch (gimple_code (*def_stmt))
8722 case GIMPLE_PHI:
8723 case GIMPLE_ASSIGN:
8724 case GIMPLE_CALL:
8725 break;
8726 default:
8727 if (dump_enabled_p ())
8728 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8729 "unsupported defining stmt:\n");
8730 return false;
8733 return true;
8736 /* Function vect_is_simple_use.
8738 Same as vect_is_simple_use but also determines the vector operand
8739 type of OPERAND and stores it to *VECTYPE. If the definition of
8740 OPERAND is vect_uninitialized_def, vect_constant_def or
8741 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8742 is responsible to compute the best suited vector type for the
8743 scalar operand. */
8745 bool
8746 vect_is_simple_use (tree operand, vec_info *vinfo,
8747 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
8749 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
8750 return false;
8752 /* Now get a vector type if the def is internal, otherwise supply
8753 NULL_TREE and leave it up to the caller to figure out a proper
8754 type for the use stmt. */
8755 if (*dt == vect_internal_def
8756 || *dt == vect_induction_def
8757 || *dt == vect_reduction_def
8758 || *dt == vect_double_reduction_def
8759 || *dt == vect_nested_cycle)
8761 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
8763 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8764 && !STMT_VINFO_RELEVANT (stmt_info)
8765 && !STMT_VINFO_LIVE_P (stmt_info))
8766 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8768 *vectype = STMT_VINFO_VECTYPE (stmt_info);
8769 gcc_assert (*vectype != NULL_TREE);
8771 else if (*dt == vect_uninitialized_def
8772 || *dt == vect_constant_def
8773 || *dt == vect_external_def)
8774 *vectype = NULL_TREE;
8775 else
8776 gcc_unreachable ();
8778 return true;
8782 /* Function supportable_widening_operation
8784 Check whether an operation represented by the code CODE is a
8785 widening operation that is supported by the target platform in
8786 vector form (i.e., when operating on arguments of type VECTYPE_IN
8787 producing a result of type VECTYPE_OUT).
8789 Widening operations we currently support are NOP (CONVERT), FLOAT
8790 and WIDEN_MULT. This function checks if these operations are supported
8791 by the target platform either directly (via vector tree-codes), or via
8792 target builtins.
8794 Output:
8795 - CODE1 and CODE2 are codes of vector operations to be used when
8796 vectorizing the operation, if available.
8797 - MULTI_STEP_CVT determines the number of required intermediate steps in
8798 case of multi-step conversion (like char->short->int - in that case
8799 MULTI_STEP_CVT will be 1).
8800 - INTERM_TYPES contains the intermediate type required to perform the
8801 widening operation (short in the above example). */
8803 bool
8804 supportable_widening_operation (enum tree_code code, gimple *stmt,
8805 tree vectype_out, tree vectype_in,
8806 enum tree_code *code1, enum tree_code *code2,
8807 int *multi_step_cvt,
8808 vec<tree> *interm_types)
8810 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8811 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
8812 struct loop *vect_loop = NULL;
8813 machine_mode vec_mode;
8814 enum insn_code icode1, icode2;
8815 optab optab1, optab2;
8816 tree vectype = vectype_in;
8817 tree wide_vectype = vectype_out;
8818 enum tree_code c1, c2;
8819 int i;
8820 tree prev_type, intermediate_type;
8821 machine_mode intermediate_mode, prev_mode;
8822 optab optab3, optab4;
8824 *multi_step_cvt = 0;
8825 if (loop_info)
8826 vect_loop = LOOP_VINFO_LOOP (loop_info);
8828 switch (code)
8830 case WIDEN_MULT_EXPR:
8831 /* The result of a vectorized widening operation usually requires
8832 two vectors (because the widened results do not fit into one vector).
8833 The generated vector results would normally be expected to be
8834 generated in the same order as in the original scalar computation,
8835 i.e. if 8 results are generated in each vector iteration, they are
8836 to be organized as follows:
8837 vect1: [res1,res2,res3,res4],
8838 vect2: [res5,res6,res7,res8].
8840 However, in the special case that the result of the widening
8841 operation is used in a reduction computation only, the order doesn't
8842 matter (because when vectorizing a reduction we change the order of
8843 the computation). Some targets can take advantage of this and
8844 generate more efficient code. For example, targets like Altivec,
8845 that support widen_mult using a sequence of {mult_even,mult_odd}
8846 generate the following vectors:
8847 vect1: [res1,res3,res5,res7],
8848 vect2: [res2,res4,res6,res8].
8850 When vectorizing outer-loops, we execute the inner-loop sequentially
8851 (each vectorized inner-loop iteration contributes to VF outer-loop
8852 iterations in parallel). We therefore don't allow to change the
8853 order of the computation in the inner-loop during outer-loop
8854 vectorization. */
8855 /* TODO: Another case in which order doesn't *really* matter is when we
8856 widen and then contract again, e.g. (short)((int)x * y >> 8).
8857 Normally, pack_trunc performs an even/odd permute, whereas the
8858 repack from an even/odd expansion would be an interleave, which
8859 would be significantly simpler for e.g. AVX2. */
8860 /* In any case, in order to avoid duplicating the code below, recurse
8861 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8862 are properly set up for the caller. If we fail, we'll continue with
8863 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8864 if (vect_loop
8865 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
8866 && !nested_in_vect_loop_p (vect_loop, stmt)
8867 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
8868 stmt, vectype_out, vectype_in,
8869 code1, code2, multi_step_cvt,
8870 interm_types))
8872 /* Elements in a vector with vect_used_by_reduction property cannot
8873 be reordered if the use chain with this property does not have the
8874 same operation. One such an example is s += a * b, where elements
8875 in a and b cannot be reordered. Here we check if the vector defined
8876 by STMT is only directly used in the reduction statement. */
8877 tree lhs = gimple_assign_lhs (stmt);
8878 use_operand_p dummy;
8879 gimple *use_stmt;
8880 stmt_vec_info use_stmt_info = NULL;
8881 if (single_imm_use (lhs, &dummy, &use_stmt)
8882 && (use_stmt_info = vinfo_for_stmt (use_stmt))
8883 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
8884 return true;
8886 c1 = VEC_WIDEN_MULT_LO_EXPR;
8887 c2 = VEC_WIDEN_MULT_HI_EXPR;
8888 break;
8890 case DOT_PROD_EXPR:
8891 c1 = DOT_PROD_EXPR;
8892 c2 = DOT_PROD_EXPR;
8893 break;
8895 case SAD_EXPR:
8896 c1 = SAD_EXPR;
8897 c2 = SAD_EXPR;
8898 break;
8900 case VEC_WIDEN_MULT_EVEN_EXPR:
8901 /* Support the recursion induced just above. */
8902 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
8903 c2 = VEC_WIDEN_MULT_ODD_EXPR;
8904 break;
8906 case WIDEN_LSHIFT_EXPR:
8907 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
8908 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
8909 break;
8911 CASE_CONVERT:
8912 c1 = VEC_UNPACK_LO_EXPR;
8913 c2 = VEC_UNPACK_HI_EXPR;
8914 break;
8916 case FLOAT_EXPR:
8917 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
8918 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
8919 break;
8921 case FIX_TRUNC_EXPR:
8922 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8923 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8924 computing the operation. */
8925 return false;
8927 default:
8928 gcc_unreachable ();
8931 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
8932 std::swap (c1, c2);
8934 if (code == FIX_TRUNC_EXPR)
8936 /* The signedness is determined from output operand. */
8937 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8938 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
8940 else
8942 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8943 optab2 = optab_for_tree_code (c2, vectype, optab_default);
8946 if (!optab1 || !optab2)
8947 return false;
8949 vec_mode = TYPE_MODE (vectype);
8950 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
8951 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
8952 return false;
8954 *code1 = c1;
8955 *code2 = c2;
8957 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8958 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8959 return true;
8961 /* Check if it's a multi-step conversion that can be done using intermediate
8962 types. */
8964 prev_type = vectype;
8965 prev_mode = vec_mode;
8967 if (!CONVERT_EXPR_CODE_P (code))
8968 return false;
8970 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8971 intermediate steps in promotion sequence. We try
8972 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8973 not. */
8974 interm_types->create (MAX_INTERM_CVT_STEPS);
8975 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8977 intermediate_mode = insn_data[icode1].operand[0].mode;
8978 intermediate_type
8979 = lang_hooks.types.type_for_mode (intermediate_mode,
8980 TYPE_UNSIGNED (prev_type));
8981 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
8982 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
8984 if (!optab3 || !optab4
8985 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
8986 || insn_data[icode1].operand[0].mode != intermediate_mode
8987 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
8988 || insn_data[icode2].operand[0].mode != intermediate_mode
8989 || ((icode1 = optab_handler (optab3, intermediate_mode))
8990 == CODE_FOR_nothing)
8991 || ((icode2 = optab_handler (optab4, intermediate_mode))
8992 == CODE_FOR_nothing))
8993 break;
8995 interm_types->quick_push (intermediate_type);
8996 (*multi_step_cvt)++;
8998 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8999 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9000 return true;
9002 prev_type = intermediate_type;
9003 prev_mode = intermediate_mode;
9006 interm_types->release ();
9007 return false;
9011 /* Function supportable_narrowing_operation
9013 Check whether an operation represented by the code CODE is a
9014 narrowing operation that is supported by the target platform in
9015 vector form (i.e., when operating on arguments of type VECTYPE_IN
9016 and producing a result of type VECTYPE_OUT).
9018 Narrowing operations we currently support are NOP (CONVERT) and
9019 FIX_TRUNC. This function checks if these operations are supported by
9020 the target platform directly via vector tree-codes.
9022 Output:
9023 - CODE1 is the code of a vector operation to be used when
9024 vectorizing the operation, if available.
9025 - MULTI_STEP_CVT determines the number of required intermediate steps in
9026 case of multi-step conversion (like int->short->char - in that case
9027 MULTI_STEP_CVT will be 1).
9028 - INTERM_TYPES contains the intermediate type required to perform the
9029 narrowing operation (short in the above example). */
9031 bool
9032 supportable_narrowing_operation (enum tree_code code,
9033 tree vectype_out, tree vectype_in,
9034 enum tree_code *code1, int *multi_step_cvt,
9035 vec<tree> *interm_types)
9037 machine_mode vec_mode;
9038 enum insn_code icode1;
9039 optab optab1, interm_optab;
9040 tree vectype = vectype_in;
9041 tree narrow_vectype = vectype_out;
9042 enum tree_code c1;
9043 tree intermediate_type;
9044 machine_mode intermediate_mode, prev_mode;
9045 int i;
9046 bool uns;
9048 *multi_step_cvt = 0;
9049 switch (code)
9051 CASE_CONVERT:
9052 c1 = VEC_PACK_TRUNC_EXPR;
9053 break;
9055 case FIX_TRUNC_EXPR:
9056 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9057 break;
9059 case FLOAT_EXPR:
9060 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9061 tree code and optabs used for computing the operation. */
9062 return false;
9064 default:
9065 gcc_unreachable ();
9068 if (code == FIX_TRUNC_EXPR)
9069 /* The signedness is determined from output operand. */
9070 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9071 else
9072 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9074 if (!optab1)
9075 return false;
9077 vec_mode = TYPE_MODE (vectype);
9078 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9079 return false;
9081 *code1 = c1;
9083 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9084 return true;
9086 /* Check if it's a multi-step conversion that can be done using intermediate
9087 types. */
9088 prev_mode = vec_mode;
9089 if (code == FIX_TRUNC_EXPR)
9090 uns = TYPE_UNSIGNED (vectype_out);
9091 else
9092 uns = TYPE_UNSIGNED (vectype);
9094 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9095 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9096 costly than signed. */
9097 if (code == FIX_TRUNC_EXPR && uns)
9099 enum insn_code icode2;
9101 intermediate_type
9102 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9103 interm_optab
9104 = optab_for_tree_code (c1, intermediate_type, optab_default);
9105 if (interm_optab != unknown_optab
9106 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9107 && insn_data[icode1].operand[0].mode
9108 == insn_data[icode2].operand[0].mode)
9110 uns = false;
9111 optab1 = interm_optab;
9112 icode1 = icode2;
9116 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9117 intermediate steps in promotion sequence. We try
9118 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9119 interm_types->create (MAX_INTERM_CVT_STEPS);
9120 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9122 intermediate_mode = insn_data[icode1].operand[0].mode;
9123 intermediate_type
9124 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9125 interm_optab
9126 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9127 optab_default);
9128 if (!interm_optab
9129 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9130 || insn_data[icode1].operand[0].mode != intermediate_mode
9131 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9132 == CODE_FOR_nothing))
9133 break;
9135 interm_types->quick_push (intermediate_type);
9136 (*multi_step_cvt)++;
9138 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9139 return true;
9141 prev_mode = intermediate_mode;
9142 optab1 = interm_optab;
9145 interm_types->release ();
9146 return false;