/cp
[official-gcc.git] / gcc / tree-vect-stmts.c
blob2a2364d9542667fec9ecd011fcd6e11df108615f
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "stor-layout.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-ssa-alias.h"
33 #include "internal-fn.h"
34 #include "tree-eh.h"
35 #include "gimple-expr.h"
36 #include "is-a.h"
37 #include "gimple.h"
38 #include "gimplify.h"
39 #include "gimple-iterator.h"
40 #include "gimplify-me.h"
41 #include "gimple-ssa.h"
42 #include "tree-cfg.h"
43 #include "tree-phinodes.h"
44 #include "ssa-iterators.h"
45 #include "stringpool.h"
46 #include "tree-ssanames.h"
47 #include "tree-ssa-loop-manip.h"
48 #include "cfgloop.h"
49 #include "tree-ssa-loop.h"
50 #include "tree-scalar-evolution.h"
51 #include "expr.h"
52 #include "recog.h" /* FIXME: for insn_data */
53 #include "optabs.h"
54 #include "diagnostic-core.h"
55 #include "tree-vectorizer.h"
56 #include "dumpfile.h"
57 #include "cgraph.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
64 tree
65 stmt_vectype (struct _stmt_vec_info *stmt_info)
67 return STMT_VINFO_VECTYPE (stmt_info);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
75 gimple stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
78 struct loop* loop;
80 if (!loop_vinfo)
81 return false;
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
85 return (bb->loop_father == loop->inner);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 int misalign, enum vect_cost_model_location where)
97 if (body_cost_vec)
99 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
100 add_stmt_info_to_vec (body_cost_vec, count, kind,
101 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
102 misalign);
103 return (unsigned)
104 (builtin_vectorization_cost (kind, vectype, misalign) * count);
107 else
109 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
110 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
111 void *target_cost_data;
113 if (loop_vinfo)
114 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
115 else
116 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
118 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
119 misalign, where);
123 /* Return a variable of type ELEM_TYPE[NELEMS]. */
125 static tree
126 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
128 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
129 "vect_array");
132 /* ARRAY is an array of vectors created by create_vector_array.
133 Return an SSA_NAME for the vector in index N. The reference
134 is part of the vectorization of STMT and the vector is associated
135 with scalar destination SCALAR_DEST. */
137 static tree
138 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
139 tree array, unsigned HOST_WIDE_INT n)
141 tree vect_type, vect, vect_name, array_ref;
142 gimple new_stmt;
144 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
145 vect_type = TREE_TYPE (TREE_TYPE (array));
146 vect = vect_create_destination_var (scalar_dest, vect_type);
147 array_ref = build4 (ARRAY_REF, vect_type, array,
148 build_int_cst (size_type_node, n),
149 NULL_TREE, NULL_TREE);
151 new_stmt = gimple_build_assign (vect, array_ref);
152 vect_name = make_ssa_name (vect, new_stmt);
153 gimple_assign_set_lhs (new_stmt, vect_name);
154 vect_finish_stmt_generation (stmt, new_stmt, gsi);
156 return vect_name;
159 /* ARRAY is an array of vectors created by create_vector_array.
160 Emit code to store SSA_NAME VECT in index N of the array.
161 The store is part of the vectorization of STMT. */
163 static void
164 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
165 tree array, unsigned HOST_WIDE_INT n)
167 tree array_ref;
168 gimple new_stmt;
170 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
171 build_int_cst (size_type_node, n),
172 NULL_TREE, NULL_TREE);
174 new_stmt = gimple_build_assign (array_ref, vect);
175 vect_finish_stmt_generation (stmt, new_stmt, gsi);
178 /* PTR is a pointer to an array of type TYPE. Return a representation
179 of *PTR. The memory reference replaces those in FIRST_DR
180 (and its group). */
182 static tree
183 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
185 tree mem_ref, alias_ptr_type;
187 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
188 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
189 /* Arrays have the same alignment as their type. */
190 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
191 return mem_ref;
194 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
196 /* Function vect_mark_relevant.
198 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
200 static void
201 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
202 enum vect_relevant relevant, bool live_p,
203 bool used_in_pattern)
205 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
208 gimple pattern_stmt;
210 if (dump_enabled_p ())
211 dump_printf_loc (MSG_NOTE, vect_location,
212 "mark relevant %d, live %d.\n", relevant, live_p);
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
220 bool found = false;
221 if (!used_in_pattern)
223 imm_use_iterator imm_iter;
224 use_operand_p use_p;
225 gimple use_stmt;
226 tree lhs;
227 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
228 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
230 if (is_gimple_assign (stmt))
231 lhs = gimple_assign_lhs (stmt);
232 else
233 lhs = gimple_call_lhs (stmt);
235 /* This use is out of pattern use, if LHS has other uses that are
236 pattern uses, we should mark the stmt itself, and not the pattern
237 stmt. */
238 if (lhs && TREE_CODE (lhs) == SSA_NAME)
239 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
241 if (is_gimple_debug (USE_STMT (use_p)))
242 continue;
243 use_stmt = USE_STMT (use_p);
245 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
246 continue;
248 if (vinfo_for_stmt (use_stmt)
249 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
251 found = true;
252 break;
257 if (!found)
259 /* This is the last stmt in a sequence that was detected as a
260 pattern that can potentially be vectorized. Don't mark the stmt
261 as relevant/live because it's not going to be vectorized.
262 Instead mark the pattern-stmt that replaces it. */
264 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
266 if (dump_enabled_p ())
267 dump_printf_loc (MSG_NOTE, vect_location,
268 "last stmt in pattern. don't mark"
269 " relevant/live.\n");
270 stmt_info = vinfo_for_stmt (pattern_stmt);
271 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
272 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
273 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
274 stmt = pattern_stmt;
278 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
279 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
280 STMT_VINFO_RELEVANT (stmt_info) = relevant;
282 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
283 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
285 if (dump_enabled_p ())
286 dump_printf_loc (MSG_NOTE, vect_location,
287 "already marked relevant/live.\n");
288 return;
291 worklist->safe_push (stmt);
295 /* Function vect_stmt_relevant_p.
297 Return true if STMT in loop that is represented by LOOP_VINFO is
298 "relevant for vectorization".
300 A stmt is considered "relevant for vectorization" if:
301 - it has uses outside the loop.
302 - it has vdefs (it alters memory).
303 - control stmts in the loop (except for the exit condition).
305 CHECKME: what other side effects would the vectorizer allow? */
307 static bool
308 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
309 enum vect_relevant *relevant, bool *live_p)
311 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
312 ssa_op_iter op_iter;
313 imm_use_iterator imm_iter;
314 use_operand_p use_p;
315 def_operand_p def_p;
317 *relevant = vect_unused_in_scope;
318 *live_p = false;
320 /* cond stmt other than loop exit cond. */
321 if (is_ctrl_stmt (stmt)
322 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
323 != loop_exit_ctrl_vec_info_type)
324 *relevant = vect_used_in_scope;
326 /* changing memory. */
327 if (gimple_code (stmt) != GIMPLE_PHI)
328 if (gimple_vdef (stmt))
330 if (dump_enabled_p ())
331 dump_printf_loc (MSG_NOTE, vect_location,
332 "vec_stmt_relevant_p: stmt has vdefs.\n");
333 *relevant = vect_used_in_scope;
336 /* uses outside the loop. */
337 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
339 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
341 basic_block bb = gimple_bb (USE_STMT (use_p));
342 if (!flow_bb_inside_loop_p (loop, bb))
344 if (dump_enabled_p ())
345 dump_printf_loc (MSG_NOTE, vect_location,
346 "vec_stmt_relevant_p: used out of loop.\n");
348 if (is_gimple_debug (USE_STMT (use_p)))
349 continue;
351 /* We expect all such uses to be in the loop exit phis
352 (because of loop closed form) */
353 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
354 gcc_assert (bb == single_exit (loop)->dest);
356 *live_p = true;
361 return (*live_p || *relevant);
365 /* Function exist_non_indexing_operands_for_use_p
367 USE is one of the uses attached to STMT. Check if USE is
368 used in STMT for anything other than indexing an array. */
370 static bool
371 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
373 tree operand;
374 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
376 /* USE corresponds to some operand in STMT. If there is no data
377 reference in STMT, then any operand that corresponds to USE
378 is not indexing an array. */
379 if (!STMT_VINFO_DATA_REF (stmt_info))
380 return true;
382 /* STMT has a data_ref. FORNOW this means that its of one of
383 the following forms:
384 -1- ARRAY_REF = var
385 -2- var = ARRAY_REF
386 (This should have been verified in analyze_data_refs).
388 'var' in the second case corresponds to a def, not a use,
389 so USE cannot correspond to any operands that are not used
390 for array indexing.
392 Therefore, all we need to check is if STMT falls into the
393 first case, and whether var corresponds to USE. */
395 if (!gimple_assign_copy_p (stmt))
397 if (is_gimple_call (stmt)
398 && gimple_call_internal_p (stmt))
399 switch (gimple_call_internal_fn (stmt))
401 case IFN_MASK_STORE:
402 operand = gimple_call_arg (stmt, 3);
403 if (operand == use)
404 return true;
405 /* FALLTHRU */
406 case IFN_MASK_LOAD:
407 operand = gimple_call_arg (stmt, 2);
408 if (operand == use)
409 return true;
410 break;
411 default:
412 break;
414 return false;
417 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
418 return false;
419 operand = gimple_assign_rhs1 (stmt);
420 if (TREE_CODE (operand) != SSA_NAME)
421 return false;
423 if (operand == use)
424 return true;
426 return false;
431 Function process_use.
433 Inputs:
434 - a USE in STMT in a loop represented by LOOP_VINFO
435 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
436 that defined USE. This is done by calling mark_relevant and passing it
437 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
438 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
439 be performed.
441 Outputs:
442 Generally, LIVE_P and RELEVANT are used to define the liveness and
443 relevance info of the DEF_STMT of this USE:
444 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
445 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
446 Exceptions:
447 - case 1: If USE is used only for address computations (e.g. array indexing),
448 which does not need to be directly vectorized, then the liveness/relevance
449 of the respective DEF_STMT is left unchanged.
450 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
451 skip DEF_STMT cause it had already been processed.
452 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
453 be modified accordingly.
455 Return true if everything is as expected. Return false otherwise. */
457 static bool
458 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
459 enum vect_relevant relevant, vec<gimple> *worklist,
460 bool force)
462 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
463 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
464 stmt_vec_info dstmt_vinfo;
465 basic_block bb, def_bb;
466 tree def;
467 gimple def_stmt;
468 enum vect_def_type dt;
470 /* case 1: we are only interested in uses that need to be vectorized. Uses
471 that are used for address computation are not considered relevant. */
472 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
473 return true;
475 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
477 if (dump_enabled_p ())
478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
479 "not vectorized: unsupported use in stmt.\n");
480 return false;
483 if (!def_stmt || gimple_nop_p (def_stmt))
484 return true;
486 def_bb = gimple_bb (def_stmt);
487 if (!flow_bb_inside_loop_p (loop, def_bb))
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
491 return true;
494 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
495 DEF_STMT must have already been processed, because this should be the
496 only way that STMT, which is a reduction-phi, was put in the worklist,
497 as there should be no other uses for DEF_STMT in the loop. So we just
498 check that everything is as expected, and we are done. */
499 dstmt_vinfo = vinfo_for_stmt (def_stmt);
500 bb = gimple_bb (stmt);
501 if (gimple_code (stmt) == GIMPLE_PHI
502 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
503 && gimple_code (def_stmt) != GIMPLE_PHI
504 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
505 && bb->loop_father == def_bb->loop_father)
507 if (dump_enabled_p ())
508 dump_printf_loc (MSG_NOTE, vect_location,
509 "reduc-stmt defining reduc-phi in the same nest.\n");
510 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
511 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
512 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
513 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
514 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
515 return true;
518 /* case 3a: outer-loop stmt defining an inner-loop stmt:
519 outer-loop-header-bb:
520 d = def_stmt
521 inner-loop:
522 stmt # use (d)
523 outer-loop-tail-bb:
524 ... */
525 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
527 if (dump_enabled_p ())
528 dump_printf_loc (MSG_NOTE, vect_location,
529 "outer-loop def-stmt defining inner-loop stmt.\n");
531 switch (relevant)
533 case vect_unused_in_scope:
534 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
535 vect_used_in_scope : vect_unused_in_scope;
536 break;
538 case vect_used_in_outer_by_reduction:
539 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
540 relevant = vect_used_by_reduction;
541 break;
543 case vect_used_in_outer:
544 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
545 relevant = vect_used_in_scope;
546 break;
548 case vect_used_in_scope:
549 break;
551 default:
552 gcc_unreachable ();
556 /* case 3b: inner-loop stmt defining an outer-loop stmt:
557 outer-loop-header-bb:
559 inner-loop:
560 d = def_stmt
561 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
562 stmt # use (d) */
563 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
565 if (dump_enabled_p ())
566 dump_printf_loc (MSG_NOTE, vect_location,
567 "inner-loop def-stmt defining outer-loop stmt.\n");
569 switch (relevant)
571 case vect_unused_in_scope:
572 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
573 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
574 vect_used_in_outer_by_reduction : vect_unused_in_scope;
575 break;
577 case vect_used_by_reduction:
578 relevant = vect_used_in_outer_by_reduction;
579 break;
581 case vect_used_in_scope:
582 relevant = vect_used_in_outer;
583 break;
585 default:
586 gcc_unreachable ();
590 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
591 is_pattern_stmt_p (stmt_vinfo));
592 return true;
596 /* Function vect_mark_stmts_to_be_vectorized.
598 Not all stmts in the loop need to be vectorized. For example:
600 for i...
601 for j...
602 1. T0 = i + j
603 2. T1 = a[T0]
605 3. j = j + 1
607 Stmt 1 and 3 do not need to be vectorized, because loop control and
608 addressing of vectorized data-refs are handled differently.
610 This pass detects such stmts. */
612 bool
613 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
615 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
616 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
617 unsigned int nbbs = loop->num_nodes;
618 gimple_stmt_iterator si;
619 gimple stmt;
620 unsigned int i;
621 stmt_vec_info stmt_vinfo;
622 basic_block bb;
623 gimple phi;
624 bool live_p;
625 enum vect_relevant relevant, tmp_relevant;
626 enum vect_def_type def_type;
628 if (dump_enabled_p ())
629 dump_printf_loc (MSG_NOTE, vect_location,
630 "=== vect_mark_stmts_to_be_vectorized ===\n");
632 auto_vec<gimple, 64> worklist;
634 /* 1. Init worklist. */
635 for (i = 0; i < nbbs; i++)
637 bb = bbs[i];
638 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
640 phi = gsi_stmt (si);
641 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
644 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
645 dump_printf (MSG_NOTE, "\n");
648 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
649 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
651 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
653 stmt = gsi_stmt (si);
654 if (dump_enabled_p ())
656 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
657 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
658 dump_printf (MSG_NOTE, "\n");
661 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
662 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
666 /* 2. Process_worklist */
667 while (worklist.length () > 0)
669 use_operand_p use_p;
670 ssa_op_iter iter;
672 stmt = worklist.pop ();
673 if (dump_enabled_p ())
675 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
676 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
677 dump_printf (MSG_NOTE, "\n");
680 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
681 (DEF_STMT) as relevant/irrelevant and live/dead according to the
682 liveness and relevance properties of STMT. */
683 stmt_vinfo = vinfo_for_stmt (stmt);
684 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
685 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
687 /* Generally, the liveness and relevance properties of STMT are
688 propagated as is to the DEF_STMTs of its USEs:
689 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
690 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
692 One exception is when STMT has been identified as defining a reduction
693 variable; in this case we set the liveness/relevance as follows:
694 live_p = false
695 relevant = vect_used_by_reduction
696 This is because we distinguish between two kinds of relevant stmts -
697 those that are used by a reduction computation, and those that are
698 (also) used by a regular computation. This allows us later on to
699 identify stmts that are used solely by a reduction, and therefore the
700 order of the results that they produce does not have to be kept. */
702 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
703 tmp_relevant = relevant;
704 switch (def_type)
706 case vect_reduction_def:
707 switch (tmp_relevant)
709 case vect_unused_in_scope:
710 relevant = vect_used_by_reduction;
711 break;
713 case vect_used_by_reduction:
714 if (gimple_code (stmt) == GIMPLE_PHI)
715 break;
716 /* fall through */
718 default:
719 if (dump_enabled_p ())
720 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
721 "unsupported use of reduction.\n");
722 return false;
725 live_p = false;
726 break;
728 case vect_nested_cycle:
729 if (tmp_relevant != vect_unused_in_scope
730 && tmp_relevant != vect_used_in_outer_by_reduction
731 && tmp_relevant != vect_used_in_outer)
733 if (dump_enabled_p ())
734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
735 "unsupported use of nested cycle.\n");
737 return false;
740 live_p = false;
741 break;
743 case vect_double_reduction_def:
744 if (tmp_relevant != vect_unused_in_scope
745 && tmp_relevant != vect_used_by_reduction)
747 if (dump_enabled_p ())
748 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
749 "unsupported use of double reduction.\n");
751 return false;
754 live_p = false;
755 break;
757 default:
758 break;
761 if (is_pattern_stmt_p (stmt_vinfo))
763 /* Pattern statements are not inserted into the code, so
764 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
765 have to scan the RHS or function arguments instead. */
766 if (is_gimple_assign (stmt))
768 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
769 tree op = gimple_assign_rhs1 (stmt);
771 i = 1;
772 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
774 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
775 live_p, relevant, &worklist, false)
776 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
777 live_p, relevant, &worklist, false))
778 return false;
779 i = 2;
781 for (; i < gimple_num_ops (stmt); i++)
783 op = gimple_op (stmt, i);
784 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
785 &worklist, false))
786 return false;
789 else if (is_gimple_call (stmt))
791 for (i = 0; i < gimple_call_num_args (stmt); i++)
793 tree arg = gimple_call_arg (stmt, i);
794 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
795 &worklist, false))
796 return false;
800 else
801 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
803 tree op = USE_FROM_PTR (use_p);
804 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
805 &worklist, false))
806 return false;
809 if (STMT_VINFO_GATHER_P (stmt_vinfo))
811 tree off;
812 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
813 gcc_assert (decl);
814 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
815 &worklist, true))
816 return false;
818 } /* while worklist */
820 return true;
824 /* Function vect_model_simple_cost.
826 Models cost for simple operations, i.e. those that only emit ncopies of a
827 single op. Right now, this does not account for multiple insns that could
828 be generated for the single vector op. We will handle that shortly. */
830 void
831 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
832 enum vect_def_type *dt,
833 stmt_vector_for_cost *prologue_cost_vec,
834 stmt_vector_for_cost *body_cost_vec)
836 int i;
837 int inside_cost = 0, prologue_cost = 0;
839 /* The SLP costs were already calculated during SLP tree build. */
840 if (PURE_SLP_STMT (stmt_info))
841 return;
843 /* FORNOW: Assuming maximum 2 args per stmts. */
844 for (i = 0; i < 2; i++)
845 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
846 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
847 stmt_info, 0, vect_prologue);
849 /* Pass the inside-of-loop statements to the target-specific cost model. */
850 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
851 stmt_info, 0, vect_body);
853 if (dump_enabled_p ())
854 dump_printf_loc (MSG_NOTE, vect_location,
855 "vect_model_simple_cost: inside_cost = %d, "
856 "prologue_cost = %d .\n", inside_cost, prologue_cost);
860 /* Model cost for type demotion and promotion operations. PWR is normally
861 zero for single-step promotions and demotions. It will be one if
862 two-step promotion/demotion is required, and so on. Each additional
863 step doubles the number of instructions required. */
865 static void
866 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
867 enum vect_def_type *dt, int pwr)
869 int i, tmp;
870 int inside_cost = 0, prologue_cost = 0;
871 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
872 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
873 void *target_cost_data;
875 /* The SLP costs were already calculated during SLP tree build. */
876 if (PURE_SLP_STMT (stmt_info))
877 return;
879 if (loop_vinfo)
880 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
881 else
882 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
884 for (i = 0; i < pwr + 1; i++)
886 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
887 (i + 1) : i;
888 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
889 vec_promote_demote, stmt_info, 0,
890 vect_body);
893 /* FORNOW: Assuming maximum 2 args per stmts. */
894 for (i = 0; i < 2; i++)
895 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
896 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
897 stmt_info, 0, vect_prologue);
899 if (dump_enabled_p ())
900 dump_printf_loc (MSG_NOTE, vect_location,
901 "vect_model_promotion_demotion_cost: inside_cost = %d, "
902 "prologue_cost = %d .\n", inside_cost, prologue_cost);
905 /* Function vect_cost_group_size
907 For grouped load or store, return the group_size only if it is the first
908 load or store of a group, else return 1. This ensures that group size is
909 only returned once per group. */
911 static int
912 vect_cost_group_size (stmt_vec_info stmt_info)
914 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
916 if (first_stmt == STMT_VINFO_STMT (stmt_info))
917 return GROUP_SIZE (stmt_info);
919 return 1;
923 /* Function vect_model_store_cost
925 Models cost for stores. In the case of grouped accesses, one access
926 has the overhead of the grouped access attributed to it. */
928 void
929 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
930 bool store_lanes_p, enum vect_def_type dt,
931 slp_tree slp_node,
932 stmt_vector_for_cost *prologue_cost_vec,
933 stmt_vector_for_cost *body_cost_vec)
935 int group_size;
936 unsigned int inside_cost = 0, prologue_cost = 0;
937 struct data_reference *first_dr;
938 gimple first_stmt;
940 /* The SLP costs were already calculated during SLP tree build. */
941 if (PURE_SLP_STMT (stmt_info))
942 return;
944 if (dt == vect_constant_def || dt == vect_external_def)
945 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
946 stmt_info, 0, vect_prologue);
948 /* Grouped access? */
949 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
951 if (slp_node)
953 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
954 group_size = 1;
956 else
958 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
959 group_size = vect_cost_group_size (stmt_info);
962 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
964 /* Not a grouped access. */
965 else
967 group_size = 1;
968 first_dr = STMT_VINFO_DATA_REF (stmt_info);
971 /* We assume that the cost of a single store-lanes instruction is
972 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
973 access is instead being provided by a permute-and-store operation,
974 include the cost of the permutes. */
975 if (!store_lanes_p && group_size > 1)
977 /* Uses a high and low interleave operation for each needed permute. */
979 int nstmts = ncopies * exact_log2 (group_size) * group_size;
980 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
981 stmt_info, 0, vect_body);
983 if (dump_enabled_p ())
984 dump_printf_loc (MSG_NOTE, vect_location,
985 "vect_model_store_cost: strided group_size = %d .\n",
986 group_size);
989 /* Costs of the stores. */
990 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
992 if (dump_enabled_p ())
993 dump_printf_loc (MSG_NOTE, vect_location,
994 "vect_model_store_cost: inside_cost = %d, "
995 "prologue_cost = %d .\n", inside_cost, prologue_cost);
999 /* Calculate cost of DR's memory access. */
1000 void
1001 vect_get_store_cost (struct data_reference *dr, int ncopies,
1002 unsigned int *inside_cost,
1003 stmt_vector_for_cost *body_cost_vec)
1005 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1006 gimple stmt = DR_STMT (dr);
1007 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1009 switch (alignment_support_scheme)
1011 case dr_aligned:
1013 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1014 vector_store, stmt_info, 0,
1015 vect_body);
1017 if (dump_enabled_p ())
1018 dump_printf_loc (MSG_NOTE, vect_location,
1019 "vect_model_store_cost: aligned.\n");
1020 break;
1023 case dr_unaligned_supported:
1025 /* Here, we assign an additional cost for the unaligned store. */
1026 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1027 unaligned_store, stmt_info,
1028 DR_MISALIGNMENT (dr), vect_body);
1029 if (dump_enabled_p ())
1030 dump_printf_loc (MSG_NOTE, vect_location,
1031 "vect_model_store_cost: unaligned supported by "
1032 "hardware.\n");
1033 break;
1036 case dr_unaligned_unsupported:
1038 *inside_cost = VECT_MAX_COST;
1040 if (dump_enabled_p ())
1041 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1042 "vect_model_store_cost: unsupported access.\n");
1043 break;
1046 default:
1047 gcc_unreachable ();
1052 /* Function vect_model_load_cost
1054 Models cost for loads. In the case of grouped accesses, the last access
1055 has the overhead of the grouped access attributed to it. Since unaligned
1056 accesses are supported for loads, we also account for the costs of the
1057 access scheme chosen. */
1059 void
1060 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1061 bool load_lanes_p, slp_tree slp_node,
1062 stmt_vector_for_cost *prologue_cost_vec,
1063 stmt_vector_for_cost *body_cost_vec)
1065 int group_size;
1066 gimple first_stmt;
1067 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1068 unsigned int inside_cost = 0, prologue_cost = 0;
1070 /* The SLP costs were already calculated during SLP tree build. */
1071 if (PURE_SLP_STMT (stmt_info))
1072 return;
1074 /* Grouped accesses? */
1075 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1076 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1078 group_size = vect_cost_group_size (stmt_info);
1079 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1081 /* Not a grouped access. */
1082 else
1084 group_size = 1;
1085 first_dr = dr;
1088 /* We assume that the cost of a single load-lanes instruction is
1089 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1090 access is instead being provided by a load-and-permute operation,
1091 include the cost of the permutes. */
1092 if (!load_lanes_p && group_size > 1)
1094 /* Uses an even and odd extract operations for each needed permute. */
1095 int nstmts = ncopies * exact_log2 (group_size) * group_size;
1096 inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1097 stmt_info, 0, vect_body);
1099 if (dump_enabled_p ())
1100 dump_printf_loc (MSG_NOTE, vect_location,
1101 "vect_model_load_cost: strided group_size = %d .\n",
1102 group_size);
1105 /* The loads themselves. */
1106 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1108 /* N scalar loads plus gathering them into a vector. */
1109 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1110 inside_cost += record_stmt_cost (body_cost_vec,
1111 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1112 scalar_load, stmt_info, 0, vect_body);
1113 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1114 stmt_info, 0, vect_body);
1116 else
1117 vect_get_load_cost (first_dr, ncopies,
1118 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1119 || group_size > 1 || slp_node),
1120 &inside_cost, &prologue_cost,
1121 prologue_cost_vec, body_cost_vec, true);
1123 if (dump_enabled_p ())
1124 dump_printf_loc (MSG_NOTE, vect_location,
1125 "vect_model_load_cost: inside_cost = %d, "
1126 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1130 /* Calculate cost of DR's memory access. */
1131 void
1132 vect_get_load_cost (struct data_reference *dr, int ncopies,
1133 bool add_realign_cost, unsigned int *inside_cost,
1134 unsigned int *prologue_cost,
1135 stmt_vector_for_cost *prologue_cost_vec,
1136 stmt_vector_for_cost *body_cost_vec,
1137 bool record_prologue_costs)
1139 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1140 gimple stmt = DR_STMT (dr);
1141 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1143 switch (alignment_support_scheme)
1145 case dr_aligned:
1147 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1148 stmt_info, 0, vect_body);
1150 if (dump_enabled_p ())
1151 dump_printf_loc (MSG_NOTE, vect_location,
1152 "vect_model_load_cost: aligned.\n");
1154 break;
1156 case dr_unaligned_supported:
1158 /* Here, we assign an additional cost for the unaligned load. */
1159 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1160 unaligned_load, stmt_info,
1161 DR_MISALIGNMENT (dr), vect_body);
1163 if (dump_enabled_p ())
1164 dump_printf_loc (MSG_NOTE, vect_location,
1165 "vect_model_load_cost: unaligned supported by "
1166 "hardware.\n");
1168 break;
1170 case dr_explicit_realign:
1172 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1173 vector_load, stmt_info, 0, vect_body);
1174 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1175 vec_perm, stmt_info, 0, vect_body);
1177 /* FIXME: If the misalignment remains fixed across the iterations of
1178 the containing loop, the following cost should be added to the
1179 prologue costs. */
1180 if (targetm.vectorize.builtin_mask_for_load)
1181 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1182 stmt_info, 0, vect_body);
1184 if (dump_enabled_p ())
1185 dump_printf_loc (MSG_NOTE, vect_location,
1186 "vect_model_load_cost: explicit realign\n");
1188 break;
1190 case dr_explicit_realign_optimized:
1192 if (dump_enabled_p ())
1193 dump_printf_loc (MSG_NOTE, vect_location,
1194 "vect_model_load_cost: unaligned software "
1195 "pipelined.\n");
1197 /* Unaligned software pipeline has a load of an address, an initial
1198 load, and possibly a mask operation to "prime" the loop. However,
1199 if this is an access in a group of loads, which provide grouped
1200 access, then the above cost should only be considered for one
1201 access in the group. Inside the loop, there is a load op
1202 and a realignment op. */
1204 if (add_realign_cost && record_prologue_costs)
1206 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1207 vector_stmt, stmt_info,
1208 0, vect_prologue);
1209 if (targetm.vectorize.builtin_mask_for_load)
1210 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1211 vector_stmt, stmt_info,
1212 0, vect_prologue);
1215 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1216 stmt_info, 0, vect_body);
1217 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1218 stmt_info, 0, vect_body);
1220 if (dump_enabled_p ())
1221 dump_printf_loc (MSG_NOTE, vect_location,
1222 "vect_model_load_cost: explicit realign optimized"
1223 "\n");
1225 break;
1228 case dr_unaligned_unsupported:
1230 *inside_cost = VECT_MAX_COST;
1232 if (dump_enabled_p ())
1233 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1234 "vect_model_load_cost: unsupported access.\n");
1235 break;
1238 default:
1239 gcc_unreachable ();
1243 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1244 the loop preheader for the vectorized stmt STMT. */
1246 static void
1247 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1249 if (gsi)
1250 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1251 else
1253 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1254 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1256 if (loop_vinfo)
1258 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1259 basic_block new_bb;
1260 edge pe;
1262 if (nested_in_vect_loop_p (loop, stmt))
1263 loop = loop->inner;
1265 pe = loop_preheader_edge (loop);
1266 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1267 gcc_assert (!new_bb);
1269 else
1271 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1272 basic_block bb;
1273 gimple_stmt_iterator gsi_bb_start;
1275 gcc_assert (bb_vinfo);
1276 bb = BB_VINFO_BB (bb_vinfo);
1277 gsi_bb_start = gsi_after_labels (bb);
1278 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1282 if (dump_enabled_p ())
1284 dump_printf_loc (MSG_NOTE, vect_location,
1285 "created new init_stmt: ");
1286 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1287 dump_printf (MSG_NOTE, "\n");
1291 /* Function vect_init_vector.
1293 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1294 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1295 vector type a vector with all elements equal to VAL is created first.
1296 Place the initialization at BSI if it is not NULL. Otherwise, place the
1297 initialization at the loop preheader.
1298 Return the DEF of INIT_STMT.
1299 It will be used in the vectorization of STMT. */
1301 tree
1302 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1304 tree new_var;
1305 gimple init_stmt;
1306 tree vec_oprnd;
1307 tree new_temp;
1309 if (TREE_CODE (type) == VECTOR_TYPE
1310 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1312 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1314 if (CONSTANT_CLASS_P (val))
1315 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1316 else
1318 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
1319 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
1320 new_temp, val,
1321 NULL_TREE);
1322 vect_init_vector_1 (stmt, init_stmt, gsi);
1323 val = new_temp;
1326 val = build_vector_from_val (type, val);
1329 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1330 init_stmt = gimple_build_assign (new_var, val);
1331 new_temp = make_ssa_name (new_var, init_stmt);
1332 gimple_assign_set_lhs (init_stmt, new_temp);
1333 vect_init_vector_1 (stmt, init_stmt, gsi);
1334 vec_oprnd = gimple_assign_lhs (init_stmt);
1335 return vec_oprnd;
1339 /* Function vect_get_vec_def_for_operand.
1341 OP is an operand in STMT. This function returns a (vector) def that will be
1342 used in the vectorized stmt for STMT.
1344 In the case that OP is an SSA_NAME which is defined in the loop, then
1345 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1347 In case OP is an invariant or constant, a new stmt that creates a vector def
1348 needs to be introduced. */
1350 tree
1351 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1353 tree vec_oprnd;
1354 gimple vec_stmt;
1355 gimple def_stmt;
1356 stmt_vec_info def_stmt_info = NULL;
1357 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1358 unsigned int nunits;
1359 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1360 tree def;
1361 enum vect_def_type dt;
1362 bool is_simple_use;
1363 tree vector_type;
1365 if (dump_enabled_p ())
1367 dump_printf_loc (MSG_NOTE, vect_location,
1368 "vect_get_vec_def_for_operand: ");
1369 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1370 dump_printf (MSG_NOTE, "\n");
1373 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1374 &def_stmt, &def, &dt);
1375 gcc_assert (is_simple_use);
1376 if (dump_enabled_p ())
1378 int loc_printed = 0;
1379 if (def)
1381 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1382 loc_printed = 1;
1383 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1384 dump_printf (MSG_NOTE, "\n");
1386 if (def_stmt)
1388 if (loc_printed)
1389 dump_printf (MSG_NOTE, " def_stmt = ");
1390 else
1391 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1392 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1393 dump_printf (MSG_NOTE, "\n");
1397 switch (dt)
1399 /* Case 1: operand is a constant. */
1400 case vect_constant_def:
1402 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1403 gcc_assert (vector_type);
1404 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1406 if (scalar_def)
1407 *scalar_def = op;
1409 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1410 if (dump_enabled_p ())
1411 dump_printf_loc (MSG_NOTE, vect_location,
1412 "Create vector_cst. nunits = %d\n", nunits);
1414 return vect_init_vector (stmt, op, vector_type, NULL);
1417 /* Case 2: operand is defined outside the loop - loop invariant. */
1418 case vect_external_def:
1420 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1421 gcc_assert (vector_type);
1423 if (scalar_def)
1424 *scalar_def = def;
1426 /* Create 'vec_inv = {inv,inv,..,inv}' */
1427 if (dump_enabled_p ())
1428 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1430 return vect_init_vector (stmt, def, vector_type, NULL);
1433 /* Case 3: operand is defined inside the loop. */
1434 case vect_internal_def:
1436 if (scalar_def)
1437 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1439 /* Get the def from the vectorized stmt. */
1440 def_stmt_info = vinfo_for_stmt (def_stmt);
1442 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1443 /* Get vectorized pattern statement. */
1444 if (!vec_stmt
1445 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1446 && !STMT_VINFO_RELEVANT (def_stmt_info))
1447 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1448 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1449 gcc_assert (vec_stmt);
1450 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1451 vec_oprnd = PHI_RESULT (vec_stmt);
1452 else if (is_gimple_call (vec_stmt))
1453 vec_oprnd = gimple_call_lhs (vec_stmt);
1454 else
1455 vec_oprnd = gimple_assign_lhs (vec_stmt);
1456 return vec_oprnd;
1459 /* Case 4: operand is defined by a loop header phi - reduction */
1460 case vect_reduction_def:
1461 case vect_double_reduction_def:
1462 case vect_nested_cycle:
1464 struct loop *loop;
1466 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1467 loop = (gimple_bb (def_stmt))->loop_father;
1469 /* Get the def before the loop */
1470 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1471 return get_initial_def_for_reduction (stmt, op, scalar_def);
1474 /* Case 5: operand is defined by loop-header phi - induction. */
1475 case vect_induction_def:
1477 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1479 /* Get the def from the vectorized stmt. */
1480 def_stmt_info = vinfo_for_stmt (def_stmt);
1481 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1482 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1483 vec_oprnd = PHI_RESULT (vec_stmt);
1484 else
1485 vec_oprnd = gimple_get_lhs (vec_stmt);
1486 return vec_oprnd;
1489 default:
1490 gcc_unreachable ();
1495 /* Function vect_get_vec_def_for_stmt_copy
1497 Return a vector-def for an operand. This function is used when the
1498 vectorized stmt to be created (by the caller to this function) is a "copy"
1499 created in case the vectorized result cannot fit in one vector, and several
1500 copies of the vector-stmt are required. In this case the vector-def is
1501 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1502 of the stmt that defines VEC_OPRND.
1503 DT is the type of the vector def VEC_OPRND.
1505 Context:
1506 In case the vectorization factor (VF) is bigger than the number
1507 of elements that can fit in a vectype (nunits), we have to generate
1508 more than one vector stmt to vectorize the scalar stmt. This situation
1509 arises when there are multiple data-types operated upon in the loop; the
1510 smallest data-type determines the VF, and as a result, when vectorizing
1511 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1512 vector stmt (each computing a vector of 'nunits' results, and together
1513 computing 'VF' results in each iteration). This function is called when
1514 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1515 which VF=16 and nunits=4, so the number of copies required is 4):
1517 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1519 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1520 VS1.1: vx.1 = memref1 VS1.2
1521 VS1.2: vx.2 = memref2 VS1.3
1522 VS1.3: vx.3 = memref3
1524 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1525 VSnew.1: vz1 = vx.1 + ... VSnew.2
1526 VSnew.2: vz2 = vx.2 + ... VSnew.3
1527 VSnew.3: vz3 = vx.3 + ...
1529 The vectorization of S1 is explained in vectorizable_load.
1530 The vectorization of S2:
1531 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1532 the function 'vect_get_vec_def_for_operand' is called to
1533 get the relevant vector-def for each operand of S2. For operand x it
1534 returns the vector-def 'vx.0'.
1536 To create the remaining copies of the vector-stmt (VSnew.j), this
1537 function is called to get the relevant vector-def for each operand. It is
1538 obtained from the respective VS1.j stmt, which is recorded in the
1539 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1541 For example, to obtain the vector-def 'vx.1' in order to create the
1542 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1543 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1544 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1545 and return its def ('vx.1').
1546 Overall, to create the above sequence this function will be called 3 times:
1547 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1548 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1549 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1551 tree
1552 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1554 gimple vec_stmt_for_operand;
1555 stmt_vec_info def_stmt_info;
1557 /* Do nothing; can reuse same def. */
1558 if (dt == vect_external_def || dt == vect_constant_def )
1559 return vec_oprnd;
1561 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1562 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1563 gcc_assert (def_stmt_info);
1564 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1565 gcc_assert (vec_stmt_for_operand);
1566 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1567 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1568 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1569 else
1570 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1571 return vec_oprnd;
1575 /* Get vectorized definitions for the operands to create a copy of an original
1576 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1578 static void
1579 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1580 vec<tree> *vec_oprnds0,
1581 vec<tree> *vec_oprnds1)
1583 tree vec_oprnd = vec_oprnds0->pop ();
1585 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1586 vec_oprnds0->quick_push (vec_oprnd);
1588 if (vec_oprnds1 && vec_oprnds1->length ())
1590 vec_oprnd = vec_oprnds1->pop ();
1591 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1592 vec_oprnds1->quick_push (vec_oprnd);
1597 /* Get vectorized definitions for OP0 and OP1.
1598 REDUC_INDEX is the index of reduction operand in case of reduction,
1599 and -1 otherwise. */
1601 void
1602 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1603 vec<tree> *vec_oprnds0,
1604 vec<tree> *vec_oprnds1,
1605 slp_tree slp_node, int reduc_index)
1607 if (slp_node)
1609 int nops = (op1 == NULL_TREE) ? 1 : 2;
1610 auto_vec<tree> ops (nops);
1611 auto_vec<vec<tree> > vec_defs (nops);
1613 ops.quick_push (op0);
1614 if (op1)
1615 ops.quick_push (op1);
1617 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1619 *vec_oprnds0 = vec_defs[0];
1620 if (op1)
1621 *vec_oprnds1 = vec_defs[1];
1623 else
1625 tree vec_oprnd;
1627 vec_oprnds0->create (1);
1628 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1629 vec_oprnds0->quick_push (vec_oprnd);
1631 if (op1)
1633 vec_oprnds1->create (1);
1634 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1635 vec_oprnds1->quick_push (vec_oprnd);
1641 /* Function vect_finish_stmt_generation.
1643 Insert a new stmt. */
1645 void
1646 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1647 gimple_stmt_iterator *gsi)
1649 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1650 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1651 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1653 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1655 if (!gsi_end_p (*gsi)
1656 && gimple_has_mem_ops (vec_stmt))
1658 gimple at_stmt = gsi_stmt (*gsi);
1659 tree vuse = gimple_vuse (at_stmt);
1660 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1662 tree vdef = gimple_vdef (at_stmt);
1663 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1664 /* If we have an SSA vuse and insert a store, update virtual
1665 SSA form to avoid triggering the renamer. Do so only
1666 if we can easily see all uses - which is what almost always
1667 happens with the way vectorized stmts are inserted. */
1668 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1669 && ((is_gimple_assign (vec_stmt)
1670 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1671 || (is_gimple_call (vec_stmt)
1672 && !(gimple_call_flags (vec_stmt)
1673 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1675 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1676 gimple_set_vdef (vec_stmt, new_vdef);
1677 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1681 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1683 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1684 bb_vinfo));
1686 if (dump_enabled_p ())
1688 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1689 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1690 dump_printf (MSG_NOTE, "\n");
1693 gimple_set_location (vec_stmt, gimple_location (stmt));
1696 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1697 a function declaration if the target has a vectorized version
1698 of the function, or NULL_TREE if the function cannot be vectorized. */
1700 tree
1701 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1703 tree fndecl = gimple_call_fndecl (call);
1705 /* We only handle functions that do not read or clobber memory -- i.e.
1706 const or novops ones. */
1707 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1708 return NULL_TREE;
1710 if (!fndecl
1711 || TREE_CODE (fndecl) != FUNCTION_DECL
1712 || !DECL_BUILT_IN (fndecl))
1713 return NULL_TREE;
1715 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1716 vectype_in);
1720 static tree permute_vec_elements (tree, tree, tree, gimple,
1721 gimple_stmt_iterator *);
1724 /* Function vectorizable_mask_load_store.
1726 Check if STMT performs a conditional load or store that can be vectorized.
1727 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1728 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1729 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1731 static bool
1732 vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1733 gimple *vec_stmt, slp_tree slp_node)
1735 tree vec_dest = NULL;
1736 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1737 stmt_vec_info prev_stmt_info;
1738 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1739 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1740 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1741 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1742 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1743 tree elem_type;
1744 gimple new_stmt;
1745 tree dummy;
1746 tree dataref_ptr = NULL_TREE;
1747 gimple ptr_incr;
1748 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1749 int ncopies;
1750 int i, j;
1751 bool inv_p;
1752 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1753 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1754 int gather_scale = 1;
1755 enum vect_def_type gather_dt = vect_unknown_def_type;
1756 bool is_store;
1757 tree mask;
1758 gimple def_stmt;
1759 tree def;
1760 enum vect_def_type dt;
1762 if (slp_node != NULL)
1763 return false;
1765 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1766 gcc_assert (ncopies >= 1);
1768 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1769 mask = gimple_call_arg (stmt, 2);
1770 if (TYPE_PRECISION (TREE_TYPE (mask))
1771 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1772 return false;
1774 /* FORNOW. This restriction should be relaxed. */
1775 if (nested_in_vect_loop && ncopies > 1)
1777 if (dump_enabled_p ())
1778 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1779 "multiple types in nested loop.");
1780 return false;
1783 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1784 return false;
1786 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1787 return false;
1789 if (!STMT_VINFO_DATA_REF (stmt_info))
1790 return false;
1792 elem_type = TREE_TYPE (vectype);
1794 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1795 return false;
1797 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1798 return false;
1800 if (STMT_VINFO_GATHER_P (stmt_info))
1802 gimple def_stmt;
1803 tree def;
1804 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1805 &gather_off, &gather_scale);
1806 gcc_assert (gather_decl);
1807 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1808 &def_stmt, &def, &gather_dt,
1809 &gather_off_vectype))
1811 if (dump_enabled_p ())
1812 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1813 "gather index use not simple.");
1814 return false;
1817 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1818 tree masktype
1819 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1820 if (TREE_CODE (masktype) == INTEGER_TYPE)
1822 if (dump_enabled_p ())
1823 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1824 "masked gather with integer mask not supported.");
1825 return false;
1828 else if (tree_int_cst_compare (nested_in_vect_loop
1829 ? STMT_VINFO_DR_STEP (stmt_info)
1830 : DR_STEP (dr), size_zero_node) <= 0)
1831 return false;
1832 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1833 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1834 return false;
1836 if (TREE_CODE (mask) != SSA_NAME)
1837 return false;
1839 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1840 &def_stmt, &def, &dt))
1841 return false;
1843 if (is_store)
1845 tree rhs = gimple_call_arg (stmt, 3);
1846 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1847 &def_stmt, &def, &dt))
1848 return false;
1851 if (!vec_stmt) /* transformation not required. */
1853 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1854 if (is_store)
1855 vect_model_store_cost (stmt_info, ncopies, false, dt,
1856 NULL, NULL, NULL);
1857 else
1858 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1859 return true;
1862 /** Transform. **/
1864 if (STMT_VINFO_GATHER_P (stmt_info))
1866 tree vec_oprnd0 = NULL_TREE, op;
1867 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1868 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1869 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1870 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1871 tree mask_perm_mask = NULL_TREE;
1872 edge pe = loop_preheader_edge (loop);
1873 gimple_seq seq;
1874 basic_block new_bb;
1875 enum { NARROW, NONE, WIDEN } modifier;
1876 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1878 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1879 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1880 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1881 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1882 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1883 scaletype = TREE_VALUE (arglist);
1884 gcc_checking_assert (types_compatible_p (srctype, rettype)
1885 && types_compatible_p (srctype, masktype));
1887 if (nunits == gather_off_nunits)
1888 modifier = NONE;
1889 else if (nunits == gather_off_nunits / 2)
1891 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1892 modifier = WIDEN;
1894 for (i = 0; i < gather_off_nunits; ++i)
1895 sel[i] = i | nunits;
1897 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
1898 gcc_assert (perm_mask != NULL_TREE);
1900 else if (nunits == gather_off_nunits * 2)
1902 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1903 modifier = NARROW;
1905 for (i = 0; i < nunits; ++i)
1906 sel[i] = i < gather_off_nunits
1907 ? i : i + nunits - gather_off_nunits;
1909 perm_mask = vect_gen_perm_mask (vectype, sel);
1910 gcc_assert (perm_mask != NULL_TREE);
1911 ncopies *= 2;
1912 for (i = 0; i < nunits; ++i)
1913 sel[i] = i | gather_off_nunits;
1914 mask_perm_mask = vect_gen_perm_mask (masktype, sel);
1915 gcc_assert (mask_perm_mask != NULL_TREE);
1917 else
1918 gcc_unreachable ();
1920 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1922 ptr = fold_convert (ptrtype, gather_base);
1923 if (!is_gimple_min_invariant (ptr))
1925 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1926 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1927 gcc_assert (!new_bb);
1930 scale = build_int_cst (scaletype, gather_scale);
1932 prev_stmt_info = NULL;
1933 for (j = 0; j < ncopies; ++j)
1935 if (modifier == WIDEN && (j & 1))
1936 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1937 perm_mask, stmt, gsi);
1938 else if (j == 0)
1939 op = vec_oprnd0
1940 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1941 else
1942 op = vec_oprnd0
1943 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1945 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1947 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1948 == TYPE_VECTOR_SUBPARTS (idxtype));
1949 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1950 var = make_ssa_name (var, NULL);
1951 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1952 new_stmt
1953 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
1954 op, NULL_TREE);
1955 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1956 op = var;
1959 if (mask_perm_mask && (j & 1))
1960 mask_op = permute_vec_elements (mask_op, mask_op,
1961 mask_perm_mask, stmt, gsi);
1962 else
1964 if (j == 0)
1965 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1966 else
1968 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
1969 &def_stmt, &def, &dt);
1970 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1973 mask_op = vec_mask;
1974 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1976 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
1977 == TYPE_VECTOR_SUBPARTS (masktype));
1978 var = vect_get_new_vect_var (masktype, vect_simple_var,
1979 NULL);
1980 var = make_ssa_name (var, NULL);
1981 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
1982 new_stmt
1983 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
1984 mask_op, NULL_TREE);
1985 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1986 mask_op = var;
1990 new_stmt
1991 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
1992 scale);
1994 if (!useless_type_conversion_p (vectype, rettype))
1996 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
1997 == TYPE_VECTOR_SUBPARTS (rettype));
1998 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
1999 op = make_ssa_name (var, new_stmt);
2000 gimple_call_set_lhs (new_stmt, op);
2001 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2002 var = make_ssa_name (vec_dest, NULL);
2003 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2004 new_stmt
2005 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
2006 NULL_TREE);
2008 else
2010 var = make_ssa_name (vec_dest, new_stmt);
2011 gimple_call_set_lhs (new_stmt, var);
2014 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2016 if (modifier == NARROW)
2018 if ((j & 1) == 0)
2020 prev_res = var;
2021 continue;
2023 var = permute_vec_elements (prev_res, var,
2024 perm_mask, stmt, gsi);
2025 new_stmt = SSA_NAME_DEF_STMT (var);
2028 if (prev_stmt_info == NULL)
2029 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2030 else
2031 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2032 prev_stmt_info = vinfo_for_stmt (new_stmt);
2034 return true;
2036 else if (is_store)
2038 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2039 prev_stmt_info = NULL;
2040 for (i = 0; i < ncopies; i++)
2042 unsigned align, misalign;
2044 if (i == 0)
2046 tree rhs = gimple_call_arg (stmt, 3);
2047 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2048 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2049 /* We should have catched mismatched types earlier. */
2050 gcc_assert (useless_type_conversion_p (vectype,
2051 TREE_TYPE (vec_rhs)));
2052 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2053 NULL_TREE, &dummy, gsi,
2054 &ptr_incr, false, &inv_p);
2055 gcc_assert (!inv_p);
2057 else
2059 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2060 &def, &dt);
2061 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2062 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2063 &def, &dt);
2064 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2065 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2066 TYPE_SIZE_UNIT (vectype));
2069 align = TYPE_ALIGN_UNIT (vectype);
2070 if (aligned_access_p (dr))
2071 misalign = 0;
2072 else if (DR_MISALIGNMENT (dr) == -1)
2074 align = TYPE_ALIGN_UNIT (elem_type);
2075 misalign = 0;
2077 else
2078 misalign = DR_MISALIGNMENT (dr);
2079 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2080 misalign);
2081 new_stmt
2082 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2083 gimple_call_arg (stmt, 1),
2084 vec_mask, vec_rhs);
2085 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2086 if (i == 0)
2087 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2088 else
2089 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2090 prev_stmt_info = vinfo_for_stmt (new_stmt);
2093 else
2095 tree vec_mask = NULL_TREE;
2096 prev_stmt_info = NULL;
2097 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2098 for (i = 0; i < ncopies; i++)
2100 unsigned align, misalign;
2102 if (i == 0)
2104 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2105 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2106 NULL_TREE, &dummy, gsi,
2107 &ptr_incr, false, &inv_p);
2108 gcc_assert (!inv_p);
2110 else
2112 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2113 &def, &dt);
2114 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2115 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2116 TYPE_SIZE_UNIT (vectype));
2119 align = TYPE_ALIGN_UNIT (vectype);
2120 if (aligned_access_p (dr))
2121 misalign = 0;
2122 else if (DR_MISALIGNMENT (dr) == -1)
2124 align = TYPE_ALIGN_UNIT (elem_type);
2125 misalign = 0;
2127 else
2128 misalign = DR_MISALIGNMENT (dr);
2129 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2130 misalign);
2131 new_stmt
2132 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2133 gimple_call_arg (stmt, 1),
2134 vec_mask);
2135 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest, NULL));
2136 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2137 if (i == 0)
2138 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2139 else
2140 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2141 prev_stmt_info = vinfo_for_stmt (new_stmt);
2145 return true;
2149 /* Function vectorizable_call.
2151 Check if STMT performs a function call that can be vectorized.
2152 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2153 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2154 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2156 static bool
2157 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2158 slp_tree slp_node)
2160 tree vec_dest;
2161 tree scalar_dest;
2162 tree op, type;
2163 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2164 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2165 tree vectype_out, vectype_in;
2166 int nunits_in;
2167 int nunits_out;
2168 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2169 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2170 tree fndecl, new_temp, def, rhs_type;
2171 gimple def_stmt;
2172 enum vect_def_type dt[3]
2173 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2174 gimple new_stmt = NULL;
2175 int ncopies, j;
2176 vec<tree> vargs = vNULL;
2177 enum { NARROW, NONE, WIDEN } modifier;
2178 size_t i, nargs;
2179 tree lhs;
2181 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2182 return false;
2184 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2185 return false;
2187 /* Is STMT a vectorizable call? */
2188 if (!is_gimple_call (stmt))
2189 return false;
2191 if (gimple_call_internal_p (stmt)
2192 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2193 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2194 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2195 slp_node);
2197 if (gimple_call_lhs (stmt) == NULL_TREE
2198 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2199 return false;
2201 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2203 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2205 /* Process function arguments. */
2206 rhs_type = NULL_TREE;
2207 vectype_in = NULL_TREE;
2208 nargs = gimple_call_num_args (stmt);
2210 /* Bail out if the function has more than three arguments, we do not have
2211 interesting builtin functions to vectorize with more than two arguments
2212 except for fma. No arguments is also not good. */
2213 if (nargs == 0 || nargs > 3)
2214 return false;
2216 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2217 if (gimple_call_internal_p (stmt)
2218 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2220 nargs = 0;
2221 rhs_type = unsigned_type_node;
2224 for (i = 0; i < nargs; i++)
2226 tree opvectype;
2228 op = gimple_call_arg (stmt, i);
2230 /* We can only handle calls with arguments of the same type. */
2231 if (rhs_type
2232 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2234 if (dump_enabled_p ())
2235 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2236 "argument types differ.\n");
2237 return false;
2239 if (!rhs_type)
2240 rhs_type = TREE_TYPE (op);
2242 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2243 &def_stmt, &def, &dt[i], &opvectype))
2245 if (dump_enabled_p ())
2246 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2247 "use not simple.\n");
2248 return false;
2251 if (!vectype_in)
2252 vectype_in = opvectype;
2253 else if (opvectype
2254 && opvectype != vectype_in)
2256 if (dump_enabled_p ())
2257 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2258 "argument vector types differ.\n");
2259 return false;
2262 /* If all arguments are external or constant defs use a vector type with
2263 the same size as the output vector type. */
2264 if (!vectype_in)
2265 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2266 if (vec_stmt)
2267 gcc_assert (vectype_in);
2268 if (!vectype_in)
2270 if (dump_enabled_p ())
2272 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2273 "no vectype for scalar type ");
2274 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2275 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2278 return false;
2281 /* FORNOW */
2282 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2283 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2284 if (nunits_in == nunits_out / 2)
2285 modifier = NARROW;
2286 else if (nunits_out == nunits_in)
2287 modifier = NONE;
2288 else if (nunits_out == nunits_in / 2)
2289 modifier = WIDEN;
2290 else
2291 return false;
2293 /* For now, we only vectorize functions if a target specific builtin
2294 is available. TODO -- in some cases, it might be profitable to
2295 insert the calls for pieces of the vector, in order to be able
2296 to vectorize other operations in the loop. */
2297 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2298 if (fndecl == NULL_TREE)
2300 if (gimple_call_internal_p (stmt)
2301 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2302 && !slp_node
2303 && loop_vinfo
2304 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2305 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2306 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2307 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2309 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2310 { 0, 1, 2, ... vf - 1 } vector. */
2311 gcc_assert (nargs == 0);
2313 else
2315 if (dump_enabled_p ())
2316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2317 "function is not vectorizable.\n");
2318 return false;
2322 gcc_assert (!gimple_vuse (stmt));
2324 if (slp_node || PURE_SLP_STMT (stmt_info))
2325 ncopies = 1;
2326 else if (modifier == NARROW)
2327 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2328 else
2329 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2331 /* Sanity check: make sure that at least one copy of the vectorized stmt
2332 needs to be generated. */
2333 gcc_assert (ncopies >= 1);
2335 if (!vec_stmt) /* transformation not required. */
2337 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2338 if (dump_enabled_p ())
2339 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2340 "\n");
2341 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2342 return true;
2345 /** Transform. **/
2347 if (dump_enabled_p ())
2348 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2350 /* Handle def. */
2351 scalar_dest = gimple_call_lhs (stmt);
2352 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2354 prev_stmt_info = NULL;
2355 switch (modifier)
2357 case NONE:
2358 for (j = 0; j < ncopies; ++j)
2360 /* Build argument list for the vectorized call. */
2361 if (j == 0)
2362 vargs.create (nargs);
2363 else
2364 vargs.truncate (0);
2366 if (slp_node)
2368 auto_vec<vec<tree> > vec_defs (nargs);
2369 vec<tree> vec_oprnds0;
2371 for (i = 0; i < nargs; i++)
2372 vargs.quick_push (gimple_call_arg (stmt, i));
2373 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2374 vec_oprnds0 = vec_defs[0];
2376 /* Arguments are ready. Create the new vector stmt. */
2377 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2379 size_t k;
2380 for (k = 0; k < nargs; k++)
2382 vec<tree> vec_oprndsk = vec_defs[k];
2383 vargs[k] = vec_oprndsk[i];
2385 new_stmt = gimple_build_call_vec (fndecl, vargs);
2386 new_temp = make_ssa_name (vec_dest, new_stmt);
2387 gimple_call_set_lhs (new_stmt, new_temp);
2388 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2389 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2392 for (i = 0; i < nargs; i++)
2394 vec<tree> vec_oprndsi = vec_defs[i];
2395 vec_oprndsi.release ();
2397 continue;
2400 for (i = 0; i < nargs; i++)
2402 op = gimple_call_arg (stmt, i);
2403 if (j == 0)
2404 vec_oprnd0
2405 = vect_get_vec_def_for_operand (op, stmt, NULL);
2406 else
2408 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2409 vec_oprnd0
2410 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2413 vargs.quick_push (vec_oprnd0);
2416 if (gimple_call_internal_p (stmt)
2417 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2419 tree *v = XALLOCAVEC (tree, nunits_out);
2420 int k;
2421 for (k = 0; k < nunits_out; ++k)
2422 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2423 tree cst = build_vector (vectype_out, v);
2424 tree new_var
2425 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2426 gimple init_stmt = gimple_build_assign (new_var, cst);
2427 new_temp = make_ssa_name (new_var, init_stmt);
2428 gimple_assign_set_lhs (init_stmt, new_temp);
2429 vect_init_vector_1 (stmt, init_stmt, NULL);
2430 new_temp = make_ssa_name (vec_dest, NULL);
2431 new_stmt = gimple_build_assign (new_temp,
2432 gimple_assign_lhs (init_stmt));
2434 else
2436 new_stmt = gimple_build_call_vec (fndecl, vargs);
2437 new_temp = make_ssa_name (vec_dest, new_stmt);
2438 gimple_call_set_lhs (new_stmt, new_temp);
2440 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2442 if (j == 0)
2443 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2444 else
2445 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2447 prev_stmt_info = vinfo_for_stmt (new_stmt);
2450 break;
2452 case NARROW:
2453 for (j = 0; j < ncopies; ++j)
2455 /* Build argument list for the vectorized call. */
2456 if (j == 0)
2457 vargs.create (nargs * 2);
2458 else
2459 vargs.truncate (0);
2461 if (slp_node)
2463 auto_vec<vec<tree> > vec_defs (nargs);
2464 vec<tree> vec_oprnds0;
2466 for (i = 0; i < nargs; i++)
2467 vargs.quick_push (gimple_call_arg (stmt, i));
2468 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2469 vec_oprnds0 = vec_defs[0];
2471 /* Arguments are ready. Create the new vector stmt. */
2472 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2474 size_t k;
2475 vargs.truncate (0);
2476 for (k = 0; k < nargs; k++)
2478 vec<tree> vec_oprndsk = vec_defs[k];
2479 vargs.quick_push (vec_oprndsk[i]);
2480 vargs.quick_push (vec_oprndsk[i + 1]);
2482 new_stmt = gimple_build_call_vec (fndecl, vargs);
2483 new_temp = make_ssa_name (vec_dest, new_stmt);
2484 gimple_call_set_lhs (new_stmt, new_temp);
2485 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2486 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2489 for (i = 0; i < nargs; i++)
2491 vec<tree> vec_oprndsi = vec_defs[i];
2492 vec_oprndsi.release ();
2494 continue;
2497 for (i = 0; i < nargs; i++)
2499 op = gimple_call_arg (stmt, i);
2500 if (j == 0)
2502 vec_oprnd0
2503 = vect_get_vec_def_for_operand (op, stmt, NULL);
2504 vec_oprnd1
2505 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2507 else
2509 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2510 vec_oprnd0
2511 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2512 vec_oprnd1
2513 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2516 vargs.quick_push (vec_oprnd0);
2517 vargs.quick_push (vec_oprnd1);
2520 new_stmt = gimple_build_call_vec (fndecl, vargs);
2521 new_temp = make_ssa_name (vec_dest, new_stmt);
2522 gimple_call_set_lhs (new_stmt, new_temp);
2523 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2525 if (j == 0)
2526 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2527 else
2528 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2530 prev_stmt_info = vinfo_for_stmt (new_stmt);
2533 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2535 break;
2537 case WIDEN:
2538 /* No current target implements this case. */
2539 return false;
2542 vargs.release ();
2544 /* The call in STMT might prevent it from being removed in dce.
2545 We however cannot remove it here, due to the way the ssa name
2546 it defines is mapped to the new definition. So just replace
2547 rhs of the statement with something harmless. */
2549 if (slp_node)
2550 return true;
2552 type = TREE_TYPE (scalar_dest);
2553 if (is_pattern_stmt_p (stmt_info))
2554 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2555 else
2556 lhs = gimple_call_lhs (stmt);
2557 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2558 set_vinfo_for_stmt (new_stmt, stmt_info);
2559 set_vinfo_for_stmt (stmt, NULL);
2560 STMT_VINFO_STMT (stmt_info) = new_stmt;
2561 gsi_replace (gsi, new_stmt, false);
2563 return true;
2567 struct simd_call_arg_info
2569 tree vectype;
2570 tree op;
2571 enum vect_def_type dt;
2572 HOST_WIDE_INT linear_step;
2573 unsigned int align;
2576 /* Function vectorizable_simd_clone_call.
2578 Check if STMT performs a function call that can be vectorized
2579 by calling a simd clone of the function.
2580 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2581 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2582 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2584 static bool
2585 vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2586 gimple *vec_stmt, slp_tree slp_node)
2588 tree vec_dest;
2589 tree scalar_dest;
2590 tree op, type;
2591 tree vec_oprnd0 = NULL_TREE;
2592 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2593 tree vectype;
2594 unsigned int nunits;
2595 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2596 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2597 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2598 tree fndecl, new_temp, def;
2599 gimple def_stmt;
2600 gimple new_stmt = NULL;
2601 int ncopies, j;
2602 vec<simd_call_arg_info> arginfo = vNULL;
2603 vec<tree> vargs = vNULL;
2604 size_t i, nargs;
2605 tree lhs, rtype, ratype;
2606 vec<constructor_elt, va_gc> *ret_ctor_elts;
2608 /* Is STMT a vectorizable call? */
2609 if (!is_gimple_call (stmt))
2610 return false;
2612 fndecl = gimple_call_fndecl (stmt);
2613 if (fndecl == NULL_TREE)
2614 return false;
2616 struct cgraph_node *node = cgraph_get_node (fndecl);
2617 if (node == NULL || node->simd_clones == NULL)
2618 return false;
2620 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2621 return false;
2623 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2624 return false;
2626 if (gimple_call_lhs (stmt)
2627 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2628 return false;
2630 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2632 vectype = STMT_VINFO_VECTYPE (stmt_info);
2634 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2635 return false;
2637 /* FORNOW */
2638 if (slp_node || PURE_SLP_STMT (stmt_info))
2639 return false;
2641 /* Process function arguments. */
2642 nargs = gimple_call_num_args (stmt);
2644 /* Bail out if the function has zero arguments. */
2645 if (nargs == 0)
2646 return false;
2648 arginfo.create (nargs);
2650 for (i = 0; i < nargs; i++)
2652 simd_call_arg_info thisarginfo;
2653 affine_iv iv;
2655 thisarginfo.linear_step = 0;
2656 thisarginfo.align = 0;
2657 thisarginfo.op = NULL_TREE;
2659 op = gimple_call_arg (stmt, i);
2660 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2661 &def_stmt, &def, &thisarginfo.dt,
2662 &thisarginfo.vectype)
2663 || thisarginfo.dt == vect_uninitialized_def)
2665 if (dump_enabled_p ())
2666 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2667 "use not simple.\n");
2668 arginfo.release ();
2669 return false;
2672 if (thisarginfo.dt == vect_constant_def
2673 || thisarginfo.dt == vect_external_def)
2674 gcc_assert (thisarginfo.vectype == NULL_TREE);
2675 else
2676 gcc_assert (thisarginfo.vectype != NULL_TREE);
2678 if (thisarginfo.dt != vect_constant_def
2679 && thisarginfo.dt != vect_external_def
2680 && loop_vinfo
2681 && TREE_CODE (op) == SSA_NAME
2682 && simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false)
2683 && tree_fits_shwi_p (iv.step))
2685 thisarginfo.linear_step = tree_to_shwi (iv.step);
2686 thisarginfo.op = iv.base;
2688 else if ((thisarginfo.dt == vect_constant_def
2689 || thisarginfo.dt == vect_external_def)
2690 && POINTER_TYPE_P (TREE_TYPE (op)))
2691 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2693 arginfo.quick_push (thisarginfo);
2696 unsigned int badness = 0;
2697 struct cgraph_node *bestn = NULL;
2698 if (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info))
2699 bestn = cgraph_get_node (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info));
2700 else
2701 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2702 n = n->simdclone->next_clone)
2704 unsigned int this_badness = 0;
2705 if (n->simdclone->simdlen
2706 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2707 || n->simdclone->nargs != nargs)
2708 continue;
2709 if (n->simdclone->simdlen
2710 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2711 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2712 - exact_log2 (n->simdclone->simdlen)) * 1024;
2713 if (n->simdclone->inbranch)
2714 this_badness += 2048;
2715 int target_badness = targetm.simd_clone.usable (n);
2716 if (target_badness < 0)
2717 continue;
2718 this_badness += target_badness * 512;
2719 /* FORNOW: Have to add code to add the mask argument. */
2720 if (n->simdclone->inbranch)
2721 continue;
2722 for (i = 0; i < nargs; i++)
2724 switch (n->simdclone->args[i].arg_type)
2726 case SIMD_CLONE_ARG_TYPE_VECTOR:
2727 if (!useless_type_conversion_p
2728 (n->simdclone->args[i].orig_type,
2729 TREE_TYPE (gimple_call_arg (stmt, i))))
2730 i = -1;
2731 else if (arginfo[i].dt == vect_constant_def
2732 || arginfo[i].dt == vect_external_def
2733 || arginfo[i].linear_step)
2734 this_badness += 64;
2735 break;
2736 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2737 if (arginfo[i].dt != vect_constant_def
2738 && arginfo[i].dt != vect_external_def)
2739 i = -1;
2740 break;
2741 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2742 if (arginfo[i].dt == vect_constant_def
2743 || arginfo[i].dt == vect_external_def
2744 || (arginfo[i].linear_step
2745 != n->simdclone->args[i].linear_step))
2746 i = -1;
2747 break;
2748 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2749 /* FORNOW */
2750 i = -1;
2751 break;
2752 case SIMD_CLONE_ARG_TYPE_MASK:
2753 gcc_unreachable ();
2755 if (i == (size_t) -1)
2756 break;
2757 if (n->simdclone->args[i].alignment > arginfo[i].align)
2759 i = -1;
2760 break;
2762 if (arginfo[i].align)
2763 this_badness += (exact_log2 (arginfo[i].align)
2764 - exact_log2 (n->simdclone->args[i].alignment));
2766 if (i == (size_t) -1)
2767 continue;
2768 if (bestn == NULL || this_badness < badness)
2770 bestn = n;
2771 badness = this_badness;
2775 if (bestn == NULL)
2777 arginfo.release ();
2778 return false;
2781 for (i = 0; i < nargs; i++)
2782 if ((arginfo[i].dt == vect_constant_def
2783 || arginfo[i].dt == vect_external_def)
2784 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2786 arginfo[i].vectype
2787 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2788 i)));
2789 if (arginfo[i].vectype == NULL
2790 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2791 > bestn->simdclone->simdlen))
2793 arginfo.release ();
2794 return false;
2798 fndecl = bestn->decl;
2799 nunits = bestn->simdclone->simdlen;
2800 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2802 /* If the function isn't const, only allow it in simd loops where user
2803 has asserted that at least nunits consecutive iterations can be
2804 performed using SIMD instructions. */
2805 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2806 && gimple_vuse (stmt))
2808 arginfo.release ();
2809 return false;
2812 /* Sanity check: make sure that at least one copy of the vectorized stmt
2813 needs to be generated. */
2814 gcc_assert (ncopies >= 1);
2816 if (!vec_stmt) /* transformation not required. */
2818 STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info) = bestn->decl;
2819 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2820 if (dump_enabled_p ())
2821 dump_printf_loc (MSG_NOTE, vect_location,
2822 "=== vectorizable_simd_clone_call ===\n");
2823 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2824 arginfo.release ();
2825 return true;
2828 /** Transform. **/
2830 if (dump_enabled_p ())
2831 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2833 /* Handle def. */
2834 scalar_dest = gimple_call_lhs (stmt);
2835 vec_dest = NULL_TREE;
2836 rtype = NULL_TREE;
2837 ratype = NULL_TREE;
2838 if (scalar_dest)
2840 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2841 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2842 if (TREE_CODE (rtype) == ARRAY_TYPE)
2844 ratype = rtype;
2845 rtype = TREE_TYPE (ratype);
2849 prev_stmt_info = NULL;
2850 for (j = 0; j < ncopies; ++j)
2852 /* Build argument list for the vectorized call. */
2853 if (j == 0)
2854 vargs.create (nargs);
2855 else
2856 vargs.truncate (0);
2858 for (i = 0; i < nargs; i++)
2860 unsigned int k, l, m, o;
2861 tree atype;
2862 op = gimple_call_arg (stmt, i);
2863 switch (bestn->simdclone->args[i].arg_type)
2865 case SIMD_CLONE_ARG_TYPE_VECTOR:
2866 atype = bestn->simdclone->args[i].vector_type;
2867 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2868 for (m = j * o; m < (j + 1) * o; m++)
2870 if (TYPE_VECTOR_SUBPARTS (atype)
2871 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2873 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2874 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2875 / TYPE_VECTOR_SUBPARTS (atype));
2876 gcc_assert ((k & (k - 1)) == 0);
2877 if (m == 0)
2878 vec_oprnd0
2879 = vect_get_vec_def_for_operand (op, stmt, NULL);
2880 else
2882 vec_oprnd0 = arginfo[i].op;
2883 if ((m & (k - 1)) == 0)
2884 vec_oprnd0
2885 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2886 vec_oprnd0);
2888 arginfo[i].op = vec_oprnd0;
2889 vec_oprnd0
2890 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2891 size_int (prec),
2892 bitsize_int ((m & (k - 1)) * prec));
2893 new_stmt
2894 = gimple_build_assign (make_ssa_name (atype, NULL),
2895 vec_oprnd0);
2896 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2897 vargs.safe_push (gimple_assign_lhs (new_stmt));
2899 else
2901 k = (TYPE_VECTOR_SUBPARTS (atype)
2902 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
2903 gcc_assert ((k & (k - 1)) == 0);
2904 vec<constructor_elt, va_gc> *ctor_elts;
2905 if (k != 1)
2906 vec_alloc (ctor_elts, k);
2907 else
2908 ctor_elts = NULL;
2909 for (l = 0; l < k; l++)
2911 if (m == 0 && l == 0)
2912 vec_oprnd0
2913 = vect_get_vec_def_for_operand (op, stmt, NULL);
2914 else
2915 vec_oprnd0
2916 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2917 arginfo[i].op);
2918 arginfo[i].op = vec_oprnd0;
2919 if (k == 1)
2920 break;
2921 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
2922 vec_oprnd0);
2924 if (k == 1)
2925 vargs.safe_push (vec_oprnd0);
2926 else
2928 vec_oprnd0 = build_constructor (atype, ctor_elts);
2929 new_stmt
2930 = gimple_build_assign (make_ssa_name (atype, NULL),
2931 vec_oprnd0);
2932 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2933 vargs.safe_push (gimple_assign_lhs (new_stmt));
2937 break;
2938 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2939 vargs.safe_push (op);
2940 break;
2941 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2942 if (j == 0)
2944 gimple_seq stmts;
2945 arginfo[i].op
2946 = force_gimple_operand (arginfo[i].op, &stmts, true,
2947 NULL_TREE);
2948 if (stmts != NULL)
2950 basic_block new_bb;
2951 edge pe = loop_preheader_edge (loop);
2952 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2953 gcc_assert (!new_bb);
2955 tree phi_res = copy_ssa_name (op, NULL);
2956 gimple new_phi = create_phi_node (phi_res, loop->header);
2957 set_vinfo_for_stmt (new_phi,
2958 new_stmt_vec_info (new_phi, loop_vinfo,
2959 NULL));
2960 add_phi_arg (new_phi, arginfo[i].op,
2961 loop_preheader_edge (loop), UNKNOWN_LOCATION);
2962 enum tree_code code
2963 = POINTER_TYPE_P (TREE_TYPE (op))
2964 ? POINTER_PLUS_EXPR : PLUS_EXPR;
2965 tree type = POINTER_TYPE_P (TREE_TYPE (op))
2966 ? sizetype : TREE_TYPE (op);
2967 double_int cst
2968 = double_int::from_shwi
2969 (bestn->simdclone->args[i].linear_step);
2970 cst *= double_int::from_uhwi (ncopies * nunits);
2971 tree tcst = double_int_to_tree (type, cst);
2972 tree phi_arg = copy_ssa_name (op, NULL);
2973 new_stmt = gimple_build_assign_with_ops (code, phi_arg,
2974 phi_res, tcst);
2975 gimple_stmt_iterator si = gsi_after_labels (loop->header);
2976 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
2977 set_vinfo_for_stmt (new_stmt,
2978 new_stmt_vec_info (new_stmt, loop_vinfo,
2979 NULL));
2980 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
2981 UNKNOWN_LOCATION);
2982 arginfo[i].op = phi_res;
2983 vargs.safe_push (phi_res);
2985 else
2987 enum tree_code code
2988 = POINTER_TYPE_P (TREE_TYPE (op))
2989 ? POINTER_PLUS_EXPR : PLUS_EXPR;
2990 tree type = POINTER_TYPE_P (TREE_TYPE (op))
2991 ? sizetype : TREE_TYPE (op);
2992 double_int cst
2993 = double_int::from_shwi
2994 (bestn->simdclone->args[i].linear_step);
2995 cst *= double_int::from_uhwi (j * nunits);
2996 tree tcst = double_int_to_tree (type, cst);
2997 new_temp = make_ssa_name (TREE_TYPE (op), NULL);
2998 new_stmt
2999 = gimple_build_assign_with_ops (code, new_temp,
3000 arginfo[i].op, tcst);
3001 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3002 vargs.safe_push (new_temp);
3004 break;
3005 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3006 default:
3007 gcc_unreachable ();
3011 new_stmt = gimple_build_call_vec (fndecl, vargs);
3012 if (vec_dest)
3014 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3015 if (ratype)
3016 new_temp = create_tmp_var (ratype, NULL);
3017 else if (TYPE_VECTOR_SUBPARTS (vectype)
3018 == TYPE_VECTOR_SUBPARTS (rtype))
3019 new_temp = make_ssa_name (vec_dest, new_stmt);
3020 else
3021 new_temp = make_ssa_name (rtype, new_stmt);
3022 gimple_call_set_lhs (new_stmt, new_temp);
3024 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3026 if (vec_dest)
3028 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3030 unsigned int k, l;
3031 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3032 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3033 gcc_assert ((k & (k - 1)) == 0);
3034 for (l = 0; l < k; l++)
3036 tree t;
3037 if (ratype)
3039 t = build_fold_addr_expr (new_temp);
3040 t = build2 (MEM_REF, vectype, t,
3041 build_int_cst (TREE_TYPE (t),
3042 l * prec / BITS_PER_UNIT));
3044 else
3045 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3046 size_int (prec), bitsize_int (l * prec));
3047 new_stmt
3048 = gimple_build_assign (make_ssa_name (vectype, NULL), t);
3049 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3050 if (j == 0 && l == 0)
3051 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3052 else
3053 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3055 prev_stmt_info = vinfo_for_stmt (new_stmt);
3058 if (ratype)
3060 tree clobber = build_constructor (ratype, NULL);
3061 TREE_THIS_VOLATILE (clobber) = 1;
3062 new_stmt = gimple_build_assign (new_temp, clobber);
3063 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3065 continue;
3067 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3069 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3070 / TYPE_VECTOR_SUBPARTS (rtype));
3071 gcc_assert ((k & (k - 1)) == 0);
3072 if ((j & (k - 1)) == 0)
3073 vec_alloc (ret_ctor_elts, k);
3074 if (ratype)
3076 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3077 for (m = 0; m < o; m++)
3079 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3080 size_int (m), NULL_TREE, NULL_TREE);
3081 new_stmt
3082 = gimple_build_assign (make_ssa_name (rtype, NULL),
3083 tem);
3084 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3085 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3086 gimple_assign_lhs (new_stmt));
3088 tree clobber = build_constructor (ratype, NULL);
3089 TREE_THIS_VOLATILE (clobber) = 1;
3090 new_stmt = gimple_build_assign (new_temp, clobber);
3091 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3093 else
3094 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3095 if ((j & (k - 1)) != k - 1)
3096 continue;
3097 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3098 new_stmt
3099 = gimple_build_assign (make_ssa_name (vec_dest, NULL),
3100 vec_oprnd0);
3101 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3103 if ((unsigned) j == k - 1)
3104 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3105 else
3106 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3108 prev_stmt_info = vinfo_for_stmt (new_stmt);
3109 continue;
3111 else if (ratype)
3113 tree t = build_fold_addr_expr (new_temp);
3114 t = build2 (MEM_REF, vectype, t,
3115 build_int_cst (TREE_TYPE (t), 0));
3116 new_stmt
3117 = gimple_build_assign (make_ssa_name (vec_dest, NULL), t);
3118 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3119 tree clobber = build_constructor (ratype, NULL);
3120 TREE_THIS_VOLATILE (clobber) = 1;
3121 vect_finish_stmt_generation (stmt,
3122 gimple_build_assign (new_temp,
3123 clobber), gsi);
3127 if (j == 0)
3128 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3129 else
3130 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3132 prev_stmt_info = vinfo_for_stmt (new_stmt);
3135 vargs.release ();
3137 /* The call in STMT might prevent it from being removed in dce.
3138 We however cannot remove it here, due to the way the ssa name
3139 it defines is mapped to the new definition. So just replace
3140 rhs of the statement with something harmless. */
3142 if (slp_node)
3143 return true;
3145 if (scalar_dest)
3147 type = TREE_TYPE (scalar_dest);
3148 if (is_pattern_stmt_p (stmt_info))
3149 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3150 else
3151 lhs = gimple_call_lhs (stmt);
3152 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3154 else
3155 new_stmt = gimple_build_nop ();
3156 set_vinfo_for_stmt (new_stmt, stmt_info);
3157 set_vinfo_for_stmt (stmt, NULL);
3158 STMT_VINFO_STMT (stmt_info) = new_stmt;
3159 gsi_replace (gsi, new_stmt, false);
3160 unlink_stmt_vdef (stmt);
3162 return true;
3166 /* Function vect_gen_widened_results_half
3168 Create a vector stmt whose code, type, number of arguments, and result
3169 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3170 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3171 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3172 needs to be created (DECL is a function-decl of a target-builtin).
3173 STMT is the original scalar stmt that we are vectorizing. */
3175 static gimple
3176 vect_gen_widened_results_half (enum tree_code code,
3177 tree decl,
3178 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3179 tree vec_dest, gimple_stmt_iterator *gsi,
3180 gimple stmt)
3182 gimple new_stmt;
3183 tree new_temp;
3185 /* Generate half of the widened result: */
3186 if (code == CALL_EXPR)
3188 /* Target specific support */
3189 if (op_type == binary_op)
3190 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3191 else
3192 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3193 new_temp = make_ssa_name (vec_dest, new_stmt);
3194 gimple_call_set_lhs (new_stmt, new_temp);
3196 else
3198 /* Generic support */
3199 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3200 if (op_type != binary_op)
3201 vec_oprnd1 = NULL;
3202 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
3203 vec_oprnd1);
3204 new_temp = make_ssa_name (vec_dest, new_stmt);
3205 gimple_assign_set_lhs (new_stmt, new_temp);
3207 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3209 return new_stmt;
3213 /* Get vectorized definitions for loop-based vectorization. For the first
3214 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3215 scalar operand), and for the rest we get a copy with
3216 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3217 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3218 The vectors are collected into VEC_OPRNDS. */
3220 static void
3221 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
3222 vec<tree> *vec_oprnds, int multi_step_cvt)
3224 tree vec_oprnd;
3226 /* Get first vector operand. */
3227 /* All the vector operands except the very first one (that is scalar oprnd)
3228 are stmt copies. */
3229 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3230 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3231 else
3232 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3234 vec_oprnds->quick_push (vec_oprnd);
3236 /* Get second vector operand. */
3237 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3238 vec_oprnds->quick_push (vec_oprnd);
3240 *oprnd = vec_oprnd;
3242 /* For conversion in multiple steps, continue to get operands
3243 recursively. */
3244 if (multi_step_cvt)
3245 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3249 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3250 For multi-step conversions store the resulting vectors and call the function
3251 recursively. */
3253 static void
3254 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3255 int multi_step_cvt, gimple stmt,
3256 vec<tree> vec_dsts,
3257 gimple_stmt_iterator *gsi,
3258 slp_tree slp_node, enum tree_code code,
3259 stmt_vec_info *prev_stmt_info)
3261 unsigned int i;
3262 tree vop0, vop1, new_tmp, vec_dest;
3263 gimple new_stmt;
3264 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3266 vec_dest = vec_dsts.pop ();
3268 for (i = 0; i < vec_oprnds->length (); i += 2)
3270 /* Create demotion operation. */
3271 vop0 = (*vec_oprnds)[i];
3272 vop1 = (*vec_oprnds)[i + 1];
3273 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3274 new_tmp = make_ssa_name (vec_dest, new_stmt);
3275 gimple_assign_set_lhs (new_stmt, new_tmp);
3276 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3278 if (multi_step_cvt)
3279 /* Store the resulting vector for next recursive call. */
3280 (*vec_oprnds)[i/2] = new_tmp;
3281 else
3283 /* This is the last step of the conversion sequence. Store the
3284 vectors in SLP_NODE or in vector info of the scalar statement
3285 (or in STMT_VINFO_RELATED_STMT chain). */
3286 if (slp_node)
3287 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3288 else
3290 if (!*prev_stmt_info)
3291 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3292 else
3293 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3295 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3300 /* For multi-step demotion operations we first generate demotion operations
3301 from the source type to the intermediate types, and then combine the
3302 results (stored in VEC_OPRNDS) in demotion operation to the destination
3303 type. */
3304 if (multi_step_cvt)
3306 /* At each level of recursion we have half of the operands we had at the
3307 previous level. */
3308 vec_oprnds->truncate ((i+1)/2);
3309 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3310 stmt, vec_dsts, gsi, slp_node,
3311 VEC_PACK_TRUNC_EXPR,
3312 prev_stmt_info);
3315 vec_dsts.quick_push (vec_dest);
3319 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3320 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3321 the resulting vectors and call the function recursively. */
3323 static void
3324 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3325 vec<tree> *vec_oprnds1,
3326 gimple stmt, tree vec_dest,
3327 gimple_stmt_iterator *gsi,
3328 enum tree_code code1,
3329 enum tree_code code2, tree decl1,
3330 tree decl2, int op_type)
3332 int i;
3333 tree vop0, vop1, new_tmp1, new_tmp2;
3334 gimple new_stmt1, new_stmt2;
3335 vec<tree> vec_tmp = vNULL;
3337 vec_tmp.create (vec_oprnds0->length () * 2);
3338 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3340 if (op_type == binary_op)
3341 vop1 = (*vec_oprnds1)[i];
3342 else
3343 vop1 = NULL_TREE;
3345 /* Generate the two halves of promotion operation. */
3346 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3347 op_type, vec_dest, gsi, stmt);
3348 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3349 op_type, vec_dest, gsi, stmt);
3350 if (is_gimple_call (new_stmt1))
3352 new_tmp1 = gimple_call_lhs (new_stmt1);
3353 new_tmp2 = gimple_call_lhs (new_stmt2);
3355 else
3357 new_tmp1 = gimple_assign_lhs (new_stmt1);
3358 new_tmp2 = gimple_assign_lhs (new_stmt2);
3361 /* Store the results for the next step. */
3362 vec_tmp.quick_push (new_tmp1);
3363 vec_tmp.quick_push (new_tmp2);
3366 vec_oprnds0->release ();
3367 *vec_oprnds0 = vec_tmp;
3371 /* Check if STMT performs a conversion operation, that can be vectorized.
3372 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3373 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3374 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3376 static bool
3377 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3378 gimple *vec_stmt, slp_tree slp_node)
3380 tree vec_dest;
3381 tree scalar_dest;
3382 tree op0, op1 = NULL_TREE;
3383 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3384 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3385 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3386 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3387 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3388 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3389 tree new_temp;
3390 tree def;
3391 gimple def_stmt;
3392 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3393 gimple new_stmt = NULL;
3394 stmt_vec_info prev_stmt_info;
3395 int nunits_in;
3396 int nunits_out;
3397 tree vectype_out, vectype_in;
3398 int ncopies, i, j;
3399 tree lhs_type, rhs_type;
3400 enum { NARROW, NONE, WIDEN } modifier;
3401 vec<tree> vec_oprnds0 = vNULL;
3402 vec<tree> vec_oprnds1 = vNULL;
3403 tree vop0;
3404 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3405 int multi_step_cvt = 0;
3406 vec<tree> vec_dsts = vNULL;
3407 vec<tree> interm_types = vNULL;
3408 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3409 int op_type;
3410 enum machine_mode rhs_mode;
3411 unsigned short fltsz;
3413 /* Is STMT a vectorizable conversion? */
3415 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3416 return false;
3418 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3419 return false;
3421 if (!is_gimple_assign (stmt))
3422 return false;
3424 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3425 return false;
3427 code = gimple_assign_rhs_code (stmt);
3428 if (!CONVERT_EXPR_CODE_P (code)
3429 && code != FIX_TRUNC_EXPR
3430 && code != FLOAT_EXPR
3431 && code != WIDEN_MULT_EXPR
3432 && code != WIDEN_LSHIFT_EXPR)
3433 return false;
3435 op_type = TREE_CODE_LENGTH (code);
3437 /* Check types of lhs and rhs. */
3438 scalar_dest = gimple_assign_lhs (stmt);
3439 lhs_type = TREE_TYPE (scalar_dest);
3440 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3442 op0 = gimple_assign_rhs1 (stmt);
3443 rhs_type = TREE_TYPE (op0);
3445 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3446 && !((INTEGRAL_TYPE_P (lhs_type)
3447 && INTEGRAL_TYPE_P (rhs_type))
3448 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3449 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3450 return false;
3452 if ((INTEGRAL_TYPE_P (lhs_type)
3453 && (TYPE_PRECISION (lhs_type)
3454 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3455 || (INTEGRAL_TYPE_P (rhs_type)
3456 && (TYPE_PRECISION (rhs_type)
3457 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3459 if (dump_enabled_p ())
3460 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3461 "type conversion to/from bit-precision unsupported."
3462 "\n");
3463 return false;
3466 /* Check the operands of the operation. */
3467 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3468 &def_stmt, &def, &dt[0], &vectype_in))
3470 if (dump_enabled_p ())
3471 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3472 "use not simple.\n");
3473 return false;
3475 if (op_type == binary_op)
3477 bool ok;
3479 op1 = gimple_assign_rhs2 (stmt);
3480 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3481 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3482 OP1. */
3483 if (CONSTANT_CLASS_P (op0))
3484 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
3485 &def_stmt, &def, &dt[1], &vectype_in);
3486 else
3487 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3488 &def, &dt[1]);
3490 if (!ok)
3492 if (dump_enabled_p ())
3493 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3494 "use not simple.\n");
3495 return false;
3499 /* If op0 is an external or constant defs use a vector type of
3500 the same size as the output vector type. */
3501 if (!vectype_in)
3502 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3503 if (vec_stmt)
3504 gcc_assert (vectype_in);
3505 if (!vectype_in)
3507 if (dump_enabled_p ())
3509 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3510 "no vectype for scalar type ");
3511 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3512 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3515 return false;
3518 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3519 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3520 if (nunits_in < nunits_out)
3521 modifier = NARROW;
3522 else if (nunits_out == nunits_in)
3523 modifier = NONE;
3524 else
3525 modifier = WIDEN;
3527 /* Multiple types in SLP are handled by creating the appropriate number of
3528 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3529 case of SLP. */
3530 if (slp_node || PURE_SLP_STMT (stmt_info))
3531 ncopies = 1;
3532 else if (modifier == NARROW)
3533 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3534 else
3535 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3537 /* Sanity check: make sure that at least one copy of the vectorized stmt
3538 needs to be generated. */
3539 gcc_assert (ncopies >= 1);
3541 /* Supportable by target? */
3542 switch (modifier)
3544 case NONE:
3545 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3546 return false;
3547 if (supportable_convert_operation (code, vectype_out, vectype_in,
3548 &decl1, &code1))
3549 break;
3550 /* FALLTHRU */
3551 unsupported:
3552 if (dump_enabled_p ())
3553 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3554 "conversion not supported by target.\n");
3555 return false;
3557 case WIDEN:
3558 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3559 &code1, &code2, &multi_step_cvt,
3560 &interm_types))
3562 /* Binary widening operation can only be supported directly by the
3563 architecture. */
3564 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3565 break;
3568 if (code != FLOAT_EXPR
3569 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3570 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3571 goto unsupported;
3573 rhs_mode = TYPE_MODE (rhs_type);
3574 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3575 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3576 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3577 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3579 cvt_type
3580 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3581 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3582 if (cvt_type == NULL_TREE)
3583 goto unsupported;
3585 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3587 if (!supportable_convert_operation (code, vectype_out,
3588 cvt_type, &decl1, &codecvt1))
3589 goto unsupported;
3591 else if (!supportable_widening_operation (code, stmt, vectype_out,
3592 cvt_type, &codecvt1,
3593 &codecvt2, &multi_step_cvt,
3594 &interm_types))
3595 continue;
3596 else
3597 gcc_assert (multi_step_cvt == 0);
3599 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3600 vectype_in, &code1, &code2,
3601 &multi_step_cvt, &interm_types))
3602 break;
3605 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3606 goto unsupported;
3608 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3609 codecvt2 = ERROR_MARK;
3610 else
3612 multi_step_cvt++;
3613 interm_types.safe_push (cvt_type);
3614 cvt_type = NULL_TREE;
3616 break;
3618 case NARROW:
3619 gcc_assert (op_type == unary_op);
3620 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3621 &code1, &multi_step_cvt,
3622 &interm_types))
3623 break;
3625 if (code != FIX_TRUNC_EXPR
3626 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3627 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3628 goto unsupported;
3630 rhs_mode = TYPE_MODE (rhs_type);
3631 cvt_type
3632 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3633 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3634 if (cvt_type == NULL_TREE)
3635 goto unsupported;
3636 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3637 &decl1, &codecvt1))
3638 goto unsupported;
3639 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3640 &code1, &multi_step_cvt,
3641 &interm_types))
3642 break;
3643 goto unsupported;
3645 default:
3646 gcc_unreachable ();
3649 if (!vec_stmt) /* transformation not required. */
3651 if (dump_enabled_p ())
3652 dump_printf_loc (MSG_NOTE, vect_location,
3653 "=== vectorizable_conversion ===\n");
3654 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3656 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3657 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3659 else if (modifier == NARROW)
3661 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3662 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3664 else
3666 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3667 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3669 interm_types.release ();
3670 return true;
3673 /** Transform. **/
3674 if (dump_enabled_p ())
3675 dump_printf_loc (MSG_NOTE, vect_location,
3676 "transform conversion. ncopies = %d.\n", ncopies);
3678 if (op_type == binary_op)
3680 if (CONSTANT_CLASS_P (op0))
3681 op0 = fold_convert (TREE_TYPE (op1), op0);
3682 else if (CONSTANT_CLASS_P (op1))
3683 op1 = fold_convert (TREE_TYPE (op0), op1);
3686 /* In case of multi-step conversion, we first generate conversion operations
3687 to the intermediate types, and then from that types to the final one.
3688 We create vector destinations for the intermediate type (TYPES) received
3689 from supportable_*_operation, and store them in the correct order
3690 for future use in vect_create_vectorized_*_stmts (). */
3691 vec_dsts.create (multi_step_cvt + 1);
3692 vec_dest = vect_create_destination_var (scalar_dest,
3693 (cvt_type && modifier == WIDEN)
3694 ? cvt_type : vectype_out);
3695 vec_dsts.quick_push (vec_dest);
3697 if (multi_step_cvt)
3699 for (i = interm_types.length () - 1;
3700 interm_types.iterate (i, &intermediate_type); i--)
3702 vec_dest = vect_create_destination_var (scalar_dest,
3703 intermediate_type);
3704 vec_dsts.quick_push (vec_dest);
3708 if (cvt_type)
3709 vec_dest = vect_create_destination_var (scalar_dest,
3710 modifier == WIDEN
3711 ? vectype_out : cvt_type);
3713 if (!slp_node)
3715 if (modifier == WIDEN)
3717 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3718 if (op_type == binary_op)
3719 vec_oprnds1.create (1);
3721 else if (modifier == NARROW)
3722 vec_oprnds0.create (
3723 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3725 else if (code == WIDEN_LSHIFT_EXPR)
3726 vec_oprnds1.create (slp_node->vec_stmts_size);
3728 last_oprnd = op0;
3729 prev_stmt_info = NULL;
3730 switch (modifier)
3732 case NONE:
3733 for (j = 0; j < ncopies; j++)
3735 if (j == 0)
3736 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3737 -1);
3738 else
3739 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3741 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3743 /* Arguments are ready, create the new vector stmt. */
3744 if (code1 == CALL_EXPR)
3746 new_stmt = gimple_build_call (decl1, 1, vop0);
3747 new_temp = make_ssa_name (vec_dest, new_stmt);
3748 gimple_call_set_lhs (new_stmt, new_temp);
3750 else
3752 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3753 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
3754 vop0, NULL);
3755 new_temp = make_ssa_name (vec_dest, new_stmt);
3756 gimple_assign_set_lhs (new_stmt, new_temp);
3759 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3760 if (slp_node)
3761 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3764 if (j == 0)
3765 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3766 else
3767 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3768 prev_stmt_info = vinfo_for_stmt (new_stmt);
3770 break;
3772 case WIDEN:
3773 /* In case the vectorization factor (VF) is bigger than the number
3774 of elements that we can fit in a vectype (nunits), we have to
3775 generate more than one vector stmt - i.e - we need to "unroll"
3776 the vector stmt by a factor VF/nunits. */
3777 for (j = 0; j < ncopies; j++)
3779 /* Handle uses. */
3780 if (j == 0)
3782 if (slp_node)
3784 if (code == WIDEN_LSHIFT_EXPR)
3786 unsigned int k;
3788 vec_oprnd1 = op1;
3789 /* Store vec_oprnd1 for every vector stmt to be created
3790 for SLP_NODE. We check during the analysis that all
3791 the shift arguments are the same. */
3792 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3793 vec_oprnds1.quick_push (vec_oprnd1);
3795 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3796 slp_node, -1);
3798 else
3799 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3800 &vec_oprnds1, slp_node, -1);
3802 else
3804 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3805 vec_oprnds0.quick_push (vec_oprnd0);
3806 if (op_type == binary_op)
3808 if (code == WIDEN_LSHIFT_EXPR)
3809 vec_oprnd1 = op1;
3810 else
3811 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3812 NULL);
3813 vec_oprnds1.quick_push (vec_oprnd1);
3817 else
3819 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3820 vec_oprnds0.truncate (0);
3821 vec_oprnds0.quick_push (vec_oprnd0);
3822 if (op_type == binary_op)
3824 if (code == WIDEN_LSHIFT_EXPR)
3825 vec_oprnd1 = op1;
3826 else
3827 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3828 vec_oprnd1);
3829 vec_oprnds1.truncate (0);
3830 vec_oprnds1.quick_push (vec_oprnd1);
3834 /* Arguments are ready. Create the new vector stmts. */
3835 for (i = multi_step_cvt; i >= 0; i--)
3837 tree this_dest = vec_dsts[i];
3838 enum tree_code c1 = code1, c2 = code2;
3839 if (i == 0 && codecvt2 != ERROR_MARK)
3841 c1 = codecvt1;
3842 c2 = codecvt2;
3844 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3845 &vec_oprnds1,
3846 stmt, this_dest, gsi,
3847 c1, c2, decl1, decl2,
3848 op_type);
3851 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3853 if (cvt_type)
3855 if (codecvt1 == CALL_EXPR)
3857 new_stmt = gimple_build_call (decl1, 1, vop0);
3858 new_temp = make_ssa_name (vec_dest, new_stmt);
3859 gimple_call_set_lhs (new_stmt, new_temp);
3861 else
3863 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3864 new_temp = make_ssa_name (vec_dest, NULL);
3865 new_stmt = gimple_build_assign_with_ops (codecvt1,
3866 new_temp,
3867 vop0, NULL);
3870 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3872 else
3873 new_stmt = SSA_NAME_DEF_STMT (vop0);
3875 if (slp_node)
3876 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3877 else
3879 if (!prev_stmt_info)
3880 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3881 else
3882 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3883 prev_stmt_info = vinfo_for_stmt (new_stmt);
3888 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3889 break;
3891 case NARROW:
3892 /* In case the vectorization factor (VF) is bigger than the number
3893 of elements that we can fit in a vectype (nunits), we have to
3894 generate more than one vector stmt - i.e - we need to "unroll"
3895 the vector stmt by a factor VF/nunits. */
3896 for (j = 0; j < ncopies; j++)
3898 /* Handle uses. */
3899 if (slp_node)
3900 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3901 slp_node, -1);
3902 else
3904 vec_oprnds0.truncate (0);
3905 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3906 vect_pow2 (multi_step_cvt) - 1);
3909 /* Arguments are ready. Create the new vector stmts. */
3910 if (cvt_type)
3911 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3913 if (codecvt1 == CALL_EXPR)
3915 new_stmt = gimple_build_call (decl1, 1, vop0);
3916 new_temp = make_ssa_name (vec_dest, new_stmt);
3917 gimple_call_set_lhs (new_stmt, new_temp);
3919 else
3921 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3922 new_temp = make_ssa_name (vec_dest, NULL);
3923 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
3924 vop0, NULL);
3927 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3928 vec_oprnds0[i] = new_temp;
3931 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
3932 stmt, vec_dsts, gsi,
3933 slp_node, code1,
3934 &prev_stmt_info);
3937 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3938 break;
3941 vec_oprnds0.release ();
3942 vec_oprnds1.release ();
3943 vec_dsts.release ();
3944 interm_types.release ();
3946 return true;
3950 /* Function vectorizable_assignment.
3952 Check if STMT performs an assignment (copy) that can be vectorized.
3953 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3954 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3955 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3957 static bool
3958 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
3959 gimple *vec_stmt, slp_tree slp_node)
3961 tree vec_dest;
3962 tree scalar_dest;
3963 tree op;
3964 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3965 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3966 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3967 tree new_temp;
3968 tree def;
3969 gimple def_stmt;
3970 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3971 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3972 int ncopies;
3973 int i, j;
3974 vec<tree> vec_oprnds = vNULL;
3975 tree vop;
3976 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3977 gimple new_stmt = NULL;
3978 stmt_vec_info prev_stmt_info = NULL;
3979 enum tree_code code;
3980 tree vectype_in;
3982 /* Multiple types in SLP are handled by creating the appropriate number of
3983 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3984 case of SLP. */
3985 if (slp_node || PURE_SLP_STMT (stmt_info))
3986 ncopies = 1;
3987 else
3988 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3990 gcc_assert (ncopies >= 1);
3992 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3993 return false;
3995 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3996 return false;
3998 /* Is vectorizable assignment? */
3999 if (!is_gimple_assign (stmt))
4000 return false;
4002 scalar_dest = gimple_assign_lhs (stmt);
4003 if (TREE_CODE (scalar_dest) != SSA_NAME)
4004 return false;
4006 code = gimple_assign_rhs_code (stmt);
4007 if (gimple_assign_single_p (stmt)
4008 || code == PAREN_EXPR
4009 || CONVERT_EXPR_CODE_P (code))
4010 op = gimple_assign_rhs1 (stmt);
4011 else
4012 return false;
4014 if (code == VIEW_CONVERT_EXPR)
4015 op = TREE_OPERAND (op, 0);
4017 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
4018 &def_stmt, &def, &dt[0], &vectype_in))
4020 if (dump_enabled_p ())
4021 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4022 "use not simple.\n");
4023 return false;
4026 /* We can handle NOP_EXPR conversions that do not change the number
4027 of elements or the vector size. */
4028 if ((CONVERT_EXPR_CODE_P (code)
4029 || code == VIEW_CONVERT_EXPR)
4030 && (!vectype_in
4031 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4032 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4033 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4034 return false;
4036 /* We do not handle bit-precision changes. */
4037 if ((CONVERT_EXPR_CODE_P (code)
4038 || code == VIEW_CONVERT_EXPR)
4039 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4040 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4041 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4042 || ((TYPE_PRECISION (TREE_TYPE (op))
4043 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4044 /* But a conversion that does not change the bit-pattern is ok. */
4045 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4046 > TYPE_PRECISION (TREE_TYPE (op)))
4047 && TYPE_UNSIGNED (TREE_TYPE (op))))
4049 if (dump_enabled_p ())
4050 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4051 "type conversion to/from bit-precision "
4052 "unsupported.\n");
4053 return false;
4056 if (!vec_stmt) /* transformation not required. */
4058 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4059 if (dump_enabled_p ())
4060 dump_printf_loc (MSG_NOTE, vect_location,
4061 "=== vectorizable_assignment ===\n");
4062 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4063 return true;
4066 /** Transform. **/
4067 if (dump_enabled_p ())
4068 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4070 /* Handle def. */
4071 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4073 /* Handle use. */
4074 for (j = 0; j < ncopies; j++)
4076 /* Handle uses. */
4077 if (j == 0)
4078 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4079 else
4080 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4082 /* Arguments are ready. create the new vector stmt. */
4083 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4085 if (CONVERT_EXPR_CODE_P (code)
4086 || code == VIEW_CONVERT_EXPR)
4087 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4088 new_stmt = gimple_build_assign (vec_dest, vop);
4089 new_temp = make_ssa_name (vec_dest, new_stmt);
4090 gimple_assign_set_lhs (new_stmt, new_temp);
4091 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4092 if (slp_node)
4093 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4096 if (slp_node)
4097 continue;
4099 if (j == 0)
4100 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4101 else
4102 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4104 prev_stmt_info = vinfo_for_stmt (new_stmt);
4107 vec_oprnds.release ();
4108 return true;
4112 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4113 either as shift by a scalar or by a vector. */
4115 bool
4116 vect_supportable_shift (enum tree_code code, tree scalar_type)
4119 enum machine_mode vec_mode;
4120 optab optab;
4121 int icode;
4122 tree vectype;
4124 vectype = get_vectype_for_scalar_type (scalar_type);
4125 if (!vectype)
4126 return false;
4128 optab = optab_for_tree_code (code, vectype, optab_scalar);
4129 if (!optab
4130 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4132 optab = optab_for_tree_code (code, vectype, optab_vector);
4133 if (!optab
4134 || (optab_handler (optab, TYPE_MODE (vectype))
4135 == CODE_FOR_nothing))
4136 return false;
4139 vec_mode = TYPE_MODE (vectype);
4140 icode = (int) optab_handler (optab, vec_mode);
4141 if (icode == CODE_FOR_nothing)
4142 return false;
4144 return true;
4148 /* Function vectorizable_shift.
4150 Check if STMT performs a shift operation that can be vectorized.
4151 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4152 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4153 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4155 static bool
4156 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4157 gimple *vec_stmt, slp_tree slp_node)
4159 tree vec_dest;
4160 tree scalar_dest;
4161 tree op0, op1 = NULL;
4162 tree vec_oprnd1 = NULL_TREE;
4163 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4164 tree vectype;
4165 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4166 enum tree_code code;
4167 enum machine_mode vec_mode;
4168 tree new_temp;
4169 optab optab;
4170 int icode;
4171 enum machine_mode optab_op2_mode;
4172 tree def;
4173 gimple def_stmt;
4174 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4175 gimple new_stmt = NULL;
4176 stmt_vec_info prev_stmt_info;
4177 int nunits_in;
4178 int nunits_out;
4179 tree vectype_out;
4180 tree op1_vectype;
4181 int ncopies;
4182 int j, i;
4183 vec<tree> vec_oprnds0 = vNULL;
4184 vec<tree> vec_oprnds1 = vNULL;
4185 tree vop0, vop1;
4186 unsigned int k;
4187 bool scalar_shift_arg = true;
4188 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4189 int vf;
4191 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4192 return false;
4194 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4195 return false;
4197 /* Is STMT a vectorizable binary/unary operation? */
4198 if (!is_gimple_assign (stmt))
4199 return false;
4201 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4202 return false;
4204 code = gimple_assign_rhs_code (stmt);
4206 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4207 || code == RROTATE_EXPR))
4208 return false;
4210 scalar_dest = gimple_assign_lhs (stmt);
4211 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4212 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4213 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4215 if (dump_enabled_p ())
4216 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4217 "bit-precision shifts not supported.\n");
4218 return false;
4221 op0 = gimple_assign_rhs1 (stmt);
4222 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4223 &def_stmt, &def, &dt[0], &vectype))
4225 if (dump_enabled_p ())
4226 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4227 "use not simple.\n");
4228 return false;
4230 /* If op0 is an external or constant def use a vector type with
4231 the same size as the output vector type. */
4232 if (!vectype)
4233 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4234 if (vec_stmt)
4235 gcc_assert (vectype);
4236 if (!vectype)
4238 if (dump_enabled_p ())
4239 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4240 "no vectype for scalar type\n");
4241 return false;
4244 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4245 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4246 if (nunits_out != nunits_in)
4247 return false;
4249 op1 = gimple_assign_rhs2 (stmt);
4250 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4251 &def, &dt[1], &op1_vectype))
4253 if (dump_enabled_p ())
4254 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4255 "use not simple.\n");
4256 return false;
4259 if (loop_vinfo)
4260 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4261 else
4262 vf = 1;
4264 /* Multiple types in SLP are handled by creating the appropriate number of
4265 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4266 case of SLP. */
4267 if (slp_node || PURE_SLP_STMT (stmt_info))
4268 ncopies = 1;
4269 else
4270 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4272 gcc_assert (ncopies >= 1);
4274 /* Determine whether the shift amount is a vector, or scalar. If the
4275 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4277 if (dt[1] == vect_internal_def && !slp_node)
4278 scalar_shift_arg = false;
4279 else if (dt[1] == vect_constant_def
4280 || dt[1] == vect_external_def
4281 || dt[1] == vect_internal_def)
4283 /* In SLP, need to check whether the shift count is the same,
4284 in loops if it is a constant or invariant, it is always
4285 a scalar shift. */
4286 if (slp_node)
4288 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4289 gimple slpstmt;
4291 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4292 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4293 scalar_shift_arg = false;
4296 else
4298 if (dump_enabled_p ())
4299 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4300 "operand mode requires invariant argument.\n");
4301 return false;
4304 /* Vector shifted by vector. */
4305 if (!scalar_shift_arg)
4307 optab = optab_for_tree_code (code, vectype, optab_vector);
4308 if (dump_enabled_p ())
4309 dump_printf_loc (MSG_NOTE, vect_location,
4310 "vector/vector shift/rotate found.\n");
4312 if (!op1_vectype)
4313 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4314 if (op1_vectype == NULL_TREE
4315 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4317 if (dump_enabled_p ())
4318 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4319 "unusable type for last operand in"
4320 " vector/vector shift/rotate.\n");
4321 return false;
4324 /* See if the machine has a vector shifted by scalar insn and if not
4325 then see if it has a vector shifted by vector insn. */
4326 else
4328 optab = optab_for_tree_code (code, vectype, optab_scalar);
4329 if (optab
4330 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4332 if (dump_enabled_p ())
4333 dump_printf_loc (MSG_NOTE, vect_location,
4334 "vector/scalar shift/rotate found.\n");
4336 else
4338 optab = optab_for_tree_code (code, vectype, optab_vector);
4339 if (optab
4340 && (optab_handler (optab, TYPE_MODE (vectype))
4341 != CODE_FOR_nothing))
4343 scalar_shift_arg = false;
4345 if (dump_enabled_p ())
4346 dump_printf_loc (MSG_NOTE, vect_location,
4347 "vector/vector shift/rotate found.\n");
4349 /* Unlike the other binary operators, shifts/rotates have
4350 the rhs being int, instead of the same type as the lhs,
4351 so make sure the scalar is the right type if we are
4352 dealing with vectors of long long/long/short/char. */
4353 if (dt[1] == vect_constant_def)
4354 op1 = fold_convert (TREE_TYPE (vectype), op1);
4355 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4356 TREE_TYPE (op1)))
4358 if (slp_node
4359 && TYPE_MODE (TREE_TYPE (vectype))
4360 != TYPE_MODE (TREE_TYPE (op1)))
4362 if (dump_enabled_p ())
4363 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4364 "unusable type for last operand in"
4365 " vector/vector shift/rotate.\n");
4366 return false;
4368 if (vec_stmt && !slp_node)
4370 op1 = fold_convert (TREE_TYPE (vectype), op1);
4371 op1 = vect_init_vector (stmt, op1,
4372 TREE_TYPE (vectype), NULL);
4379 /* Supportable by target? */
4380 if (!optab)
4382 if (dump_enabled_p ())
4383 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4384 "no optab.\n");
4385 return false;
4387 vec_mode = TYPE_MODE (vectype);
4388 icode = (int) optab_handler (optab, vec_mode);
4389 if (icode == CODE_FOR_nothing)
4391 if (dump_enabled_p ())
4392 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4393 "op not supported by target.\n");
4394 /* Check only during analysis. */
4395 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4396 || (vf < vect_min_worthwhile_factor (code)
4397 && !vec_stmt))
4398 return false;
4399 if (dump_enabled_p ())
4400 dump_printf_loc (MSG_NOTE, vect_location,
4401 "proceeding using word mode.\n");
4404 /* Worthwhile without SIMD support? Check only during analysis. */
4405 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4406 && vf < vect_min_worthwhile_factor (code)
4407 && !vec_stmt)
4409 if (dump_enabled_p ())
4410 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4411 "not worthwhile without SIMD support.\n");
4412 return false;
4415 if (!vec_stmt) /* transformation not required. */
4417 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4418 if (dump_enabled_p ())
4419 dump_printf_loc (MSG_NOTE, vect_location,
4420 "=== vectorizable_shift ===\n");
4421 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4422 return true;
4425 /** Transform. **/
4427 if (dump_enabled_p ())
4428 dump_printf_loc (MSG_NOTE, vect_location,
4429 "transform binary/unary operation.\n");
4431 /* Handle def. */
4432 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4434 prev_stmt_info = NULL;
4435 for (j = 0; j < ncopies; j++)
4437 /* Handle uses. */
4438 if (j == 0)
4440 if (scalar_shift_arg)
4442 /* Vector shl and shr insn patterns can be defined with scalar
4443 operand 2 (shift operand). In this case, use constant or loop
4444 invariant op1 directly, without extending it to vector mode
4445 first. */
4446 optab_op2_mode = insn_data[icode].operand[2].mode;
4447 if (!VECTOR_MODE_P (optab_op2_mode))
4449 if (dump_enabled_p ())
4450 dump_printf_loc (MSG_NOTE, vect_location,
4451 "operand 1 using scalar mode.\n");
4452 vec_oprnd1 = op1;
4453 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4454 vec_oprnds1.quick_push (vec_oprnd1);
4455 if (slp_node)
4457 /* Store vec_oprnd1 for every vector stmt to be created
4458 for SLP_NODE. We check during the analysis that all
4459 the shift arguments are the same.
4460 TODO: Allow different constants for different vector
4461 stmts generated for an SLP instance. */
4462 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4463 vec_oprnds1.quick_push (vec_oprnd1);
4468 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4469 (a special case for certain kind of vector shifts); otherwise,
4470 operand 1 should be of a vector type (the usual case). */
4471 if (vec_oprnd1)
4472 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4473 slp_node, -1);
4474 else
4475 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4476 slp_node, -1);
4478 else
4479 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4481 /* Arguments are ready. Create the new vector stmt. */
4482 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4484 vop1 = vec_oprnds1[i];
4485 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
4486 new_temp = make_ssa_name (vec_dest, new_stmt);
4487 gimple_assign_set_lhs (new_stmt, new_temp);
4488 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4489 if (slp_node)
4490 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4493 if (slp_node)
4494 continue;
4496 if (j == 0)
4497 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4498 else
4499 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4500 prev_stmt_info = vinfo_for_stmt (new_stmt);
4503 vec_oprnds0.release ();
4504 vec_oprnds1.release ();
4506 return true;
4510 /* Function vectorizable_operation.
4512 Check if STMT performs a binary, unary or ternary operation that can
4513 be vectorized.
4514 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4515 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4516 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4518 static bool
4519 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4520 gimple *vec_stmt, slp_tree slp_node)
4522 tree vec_dest;
4523 tree scalar_dest;
4524 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4525 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4526 tree vectype;
4527 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4528 enum tree_code code;
4529 enum machine_mode vec_mode;
4530 tree new_temp;
4531 int op_type;
4532 optab optab;
4533 int icode;
4534 tree def;
4535 gimple def_stmt;
4536 enum vect_def_type dt[3]
4537 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4538 gimple new_stmt = NULL;
4539 stmt_vec_info prev_stmt_info;
4540 int nunits_in;
4541 int nunits_out;
4542 tree vectype_out;
4543 int ncopies;
4544 int j, i;
4545 vec<tree> vec_oprnds0 = vNULL;
4546 vec<tree> vec_oprnds1 = vNULL;
4547 vec<tree> vec_oprnds2 = vNULL;
4548 tree vop0, vop1, vop2;
4549 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4550 int vf;
4552 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4553 return false;
4555 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4556 return false;
4558 /* Is STMT a vectorizable binary/unary operation? */
4559 if (!is_gimple_assign (stmt))
4560 return false;
4562 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4563 return false;
4565 code = gimple_assign_rhs_code (stmt);
4567 /* For pointer addition, we should use the normal plus for
4568 the vector addition. */
4569 if (code == POINTER_PLUS_EXPR)
4570 code = PLUS_EXPR;
4572 /* Support only unary or binary operations. */
4573 op_type = TREE_CODE_LENGTH (code);
4574 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4576 if (dump_enabled_p ())
4577 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4578 "num. args = %d (not unary/binary/ternary op).\n",
4579 op_type);
4580 return false;
4583 scalar_dest = gimple_assign_lhs (stmt);
4584 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4586 /* Most operations cannot handle bit-precision types without extra
4587 truncations. */
4588 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4589 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4590 /* Exception are bitwise binary operations. */
4591 && code != BIT_IOR_EXPR
4592 && code != BIT_XOR_EXPR
4593 && code != BIT_AND_EXPR)
4595 if (dump_enabled_p ())
4596 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4597 "bit-precision arithmetic not supported.\n");
4598 return false;
4601 op0 = gimple_assign_rhs1 (stmt);
4602 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4603 &def_stmt, &def, &dt[0], &vectype))
4605 if (dump_enabled_p ())
4606 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4607 "use not simple.\n");
4608 return false;
4610 /* If op0 is an external or constant def use a vector type with
4611 the same size as the output vector type. */
4612 if (!vectype)
4613 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4614 if (vec_stmt)
4615 gcc_assert (vectype);
4616 if (!vectype)
4618 if (dump_enabled_p ())
4620 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4621 "no vectype for scalar type ");
4622 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4623 TREE_TYPE (op0));
4624 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4627 return false;
4630 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4631 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4632 if (nunits_out != nunits_in)
4633 return false;
4635 if (op_type == binary_op || op_type == ternary_op)
4637 op1 = gimple_assign_rhs2 (stmt);
4638 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4639 &def, &dt[1]))
4641 if (dump_enabled_p ())
4642 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4643 "use not simple.\n");
4644 return false;
4647 if (op_type == ternary_op)
4649 op2 = gimple_assign_rhs3 (stmt);
4650 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4651 &def, &dt[2]))
4653 if (dump_enabled_p ())
4654 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4655 "use not simple.\n");
4656 return false;
4660 if (loop_vinfo)
4661 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4662 else
4663 vf = 1;
4665 /* Multiple types in SLP are handled by creating the appropriate number of
4666 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4667 case of SLP. */
4668 if (slp_node || PURE_SLP_STMT (stmt_info))
4669 ncopies = 1;
4670 else
4671 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4673 gcc_assert (ncopies >= 1);
4675 /* Shifts are handled in vectorizable_shift (). */
4676 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4677 || code == RROTATE_EXPR)
4678 return false;
4680 /* Supportable by target? */
4682 vec_mode = TYPE_MODE (vectype);
4683 if (code == MULT_HIGHPART_EXPR)
4685 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
4686 icode = LAST_INSN_CODE;
4687 else
4688 icode = CODE_FOR_nothing;
4690 else
4692 optab = optab_for_tree_code (code, vectype, optab_default);
4693 if (!optab)
4695 if (dump_enabled_p ())
4696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4697 "no optab.\n");
4698 return false;
4700 icode = (int) optab_handler (optab, vec_mode);
4703 if (icode == CODE_FOR_nothing)
4705 if (dump_enabled_p ())
4706 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4707 "op not supported by target.\n");
4708 /* Check only during analysis. */
4709 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4710 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4711 return false;
4712 if (dump_enabled_p ())
4713 dump_printf_loc (MSG_NOTE, vect_location,
4714 "proceeding using word mode.\n");
4717 /* Worthwhile without SIMD support? Check only during analysis. */
4718 if (!VECTOR_MODE_P (vec_mode)
4719 && !vec_stmt
4720 && vf < vect_min_worthwhile_factor (code))
4722 if (dump_enabled_p ())
4723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4724 "not worthwhile without SIMD support.\n");
4725 return false;
4728 if (!vec_stmt) /* transformation not required. */
4730 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
4731 if (dump_enabled_p ())
4732 dump_printf_loc (MSG_NOTE, vect_location,
4733 "=== vectorizable_operation ===\n");
4734 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4735 return true;
4738 /** Transform. **/
4740 if (dump_enabled_p ())
4741 dump_printf_loc (MSG_NOTE, vect_location,
4742 "transform binary/unary operation.\n");
4744 /* Handle def. */
4745 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4747 /* In case the vectorization factor (VF) is bigger than the number
4748 of elements that we can fit in a vectype (nunits), we have to generate
4749 more than one vector stmt - i.e - we need to "unroll" the
4750 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4751 from one copy of the vector stmt to the next, in the field
4752 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4753 stages to find the correct vector defs to be used when vectorizing
4754 stmts that use the defs of the current stmt. The example below
4755 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4756 we need to create 4 vectorized stmts):
4758 before vectorization:
4759 RELATED_STMT VEC_STMT
4760 S1: x = memref - -
4761 S2: z = x + 1 - -
4763 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4764 there):
4765 RELATED_STMT VEC_STMT
4766 VS1_0: vx0 = memref0 VS1_1 -
4767 VS1_1: vx1 = memref1 VS1_2 -
4768 VS1_2: vx2 = memref2 VS1_3 -
4769 VS1_3: vx3 = memref3 - -
4770 S1: x = load - VS1_0
4771 S2: z = x + 1 - -
4773 step2: vectorize stmt S2 (done here):
4774 To vectorize stmt S2 we first need to find the relevant vector
4775 def for the first operand 'x'. This is, as usual, obtained from
4776 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4777 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4778 relevant vector def 'vx0'. Having found 'vx0' we can generate
4779 the vector stmt VS2_0, and as usual, record it in the
4780 STMT_VINFO_VEC_STMT of stmt S2.
4781 When creating the second copy (VS2_1), we obtain the relevant vector
4782 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4783 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4784 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4785 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4786 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4787 chain of stmts and pointers:
4788 RELATED_STMT VEC_STMT
4789 VS1_0: vx0 = memref0 VS1_1 -
4790 VS1_1: vx1 = memref1 VS1_2 -
4791 VS1_2: vx2 = memref2 VS1_3 -
4792 VS1_3: vx3 = memref3 - -
4793 S1: x = load - VS1_0
4794 VS2_0: vz0 = vx0 + v1 VS2_1 -
4795 VS2_1: vz1 = vx1 + v1 VS2_2 -
4796 VS2_2: vz2 = vx2 + v1 VS2_3 -
4797 VS2_3: vz3 = vx3 + v1 - -
4798 S2: z = x + 1 - VS2_0 */
4800 prev_stmt_info = NULL;
4801 for (j = 0; j < ncopies; j++)
4803 /* Handle uses. */
4804 if (j == 0)
4806 if (op_type == binary_op || op_type == ternary_op)
4807 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4808 slp_node, -1);
4809 else
4810 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4811 slp_node, -1);
4812 if (op_type == ternary_op)
4814 vec_oprnds2.create (1);
4815 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4816 stmt,
4817 NULL));
4820 else
4822 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4823 if (op_type == ternary_op)
4825 tree vec_oprnd = vec_oprnds2.pop ();
4826 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4827 vec_oprnd));
4831 /* Arguments are ready. Create the new vector stmt. */
4832 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4834 vop1 = ((op_type == binary_op || op_type == ternary_op)
4835 ? vec_oprnds1[i] : NULL_TREE);
4836 vop2 = ((op_type == ternary_op)
4837 ? vec_oprnds2[i] : NULL_TREE);
4838 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
4839 vop0, vop1, vop2);
4840 new_temp = make_ssa_name (vec_dest, new_stmt);
4841 gimple_assign_set_lhs (new_stmt, new_temp);
4842 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4843 if (slp_node)
4844 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4847 if (slp_node)
4848 continue;
4850 if (j == 0)
4851 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4852 else
4853 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4854 prev_stmt_info = vinfo_for_stmt (new_stmt);
4857 vec_oprnds0.release ();
4858 vec_oprnds1.release ();
4859 vec_oprnds2.release ();
4861 return true;
4864 /* A helper function to ensure data reference DR's base alignment
4865 for STMT_INFO. */
4867 static void
4868 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4870 if (!dr->aux)
4871 return;
4873 if (((dataref_aux *)dr->aux)->base_misaligned)
4875 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4876 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
4878 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4879 DECL_USER_ALIGN (base_decl) = 1;
4880 ((dataref_aux *)dr->aux)->base_misaligned = false;
4885 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4886 reversal of the vector elements. If that is impossible to do,
4887 returns NULL. */
4889 static tree
4890 perm_mask_for_reverse (tree vectype)
4892 int i, nunits;
4893 unsigned char *sel;
4895 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4896 sel = XALLOCAVEC (unsigned char, nunits);
4898 for (i = 0; i < nunits; ++i)
4899 sel[i] = nunits - 1 - i;
4901 return vect_gen_perm_mask (vectype, sel);
4904 /* Function vectorizable_store.
4906 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4907 can be vectorized.
4908 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4909 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4910 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4912 static bool
4913 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4914 slp_tree slp_node)
4916 tree scalar_dest;
4917 tree data_ref;
4918 tree op;
4919 tree vec_oprnd = NULL_TREE;
4920 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4921 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
4922 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4923 tree elem_type;
4924 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4925 struct loop *loop = NULL;
4926 enum machine_mode vec_mode;
4927 tree dummy;
4928 enum dr_alignment_support alignment_support_scheme;
4929 tree def;
4930 gimple def_stmt;
4931 enum vect_def_type dt;
4932 stmt_vec_info prev_stmt_info = NULL;
4933 tree dataref_ptr = NULL_TREE;
4934 tree dataref_offset = NULL_TREE;
4935 gimple ptr_incr = NULL;
4936 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4937 int ncopies;
4938 int j;
4939 gimple next_stmt, first_stmt = NULL;
4940 bool grouped_store = false;
4941 bool store_lanes_p = false;
4942 unsigned int group_size, i;
4943 vec<tree> dr_chain = vNULL;
4944 vec<tree> oprnds = vNULL;
4945 vec<tree> result_chain = vNULL;
4946 bool inv_p;
4947 bool negative = false;
4948 tree offset = NULL_TREE;
4949 vec<tree> vec_oprnds = vNULL;
4950 bool slp = (slp_node != NULL);
4951 unsigned int vec_num;
4952 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4953 tree aggr_type;
4955 if (loop_vinfo)
4956 loop = LOOP_VINFO_LOOP (loop_vinfo);
4958 /* Multiple types in SLP are handled by creating the appropriate number of
4959 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4960 case of SLP. */
4961 if (slp || PURE_SLP_STMT (stmt_info))
4962 ncopies = 1;
4963 else
4964 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4966 gcc_assert (ncopies >= 1);
4968 /* FORNOW. This restriction should be relaxed. */
4969 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
4971 if (dump_enabled_p ())
4972 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4973 "multiple types in nested loop.\n");
4974 return false;
4977 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4978 return false;
4980 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4981 return false;
4983 /* Is vectorizable store? */
4985 if (!is_gimple_assign (stmt))
4986 return false;
4988 scalar_dest = gimple_assign_lhs (stmt);
4989 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
4990 && is_pattern_stmt_p (stmt_info))
4991 scalar_dest = TREE_OPERAND (scalar_dest, 0);
4992 if (TREE_CODE (scalar_dest) != ARRAY_REF
4993 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
4994 && TREE_CODE (scalar_dest) != INDIRECT_REF
4995 && TREE_CODE (scalar_dest) != COMPONENT_REF
4996 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
4997 && TREE_CODE (scalar_dest) != REALPART_EXPR
4998 && TREE_CODE (scalar_dest) != MEM_REF)
4999 return false;
5001 gcc_assert (gimple_assign_single_p (stmt));
5002 op = gimple_assign_rhs1 (stmt);
5003 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5004 &def, &dt))
5006 if (dump_enabled_p ())
5007 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5008 "use not simple.\n");
5009 return false;
5012 elem_type = TREE_TYPE (vectype);
5013 vec_mode = TYPE_MODE (vectype);
5015 /* FORNOW. In some cases can vectorize even if data-type not supported
5016 (e.g. - array initialization with 0). */
5017 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5018 return false;
5020 if (!STMT_VINFO_DATA_REF (stmt_info))
5021 return false;
5023 negative =
5024 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5025 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5026 size_zero_node) < 0;
5027 if (negative && ncopies > 1)
5029 if (dump_enabled_p ())
5030 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5031 "multiple types with negative step.\n");
5032 return false;
5035 if (negative)
5037 gcc_assert (!grouped_store);
5038 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5039 if (alignment_support_scheme != dr_aligned
5040 && alignment_support_scheme != dr_unaligned_supported)
5042 if (dump_enabled_p ())
5043 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5044 "negative step but alignment required.\n");
5045 return false;
5047 if (dt != vect_constant_def
5048 && dt != vect_external_def
5049 && !perm_mask_for_reverse (vectype))
5051 if (dump_enabled_p ())
5052 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5053 "negative step and reversing not supported.\n");
5054 return false;
5058 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5060 grouped_store = true;
5061 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5062 if (!slp && !PURE_SLP_STMT (stmt_info))
5064 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5065 if (vect_store_lanes_supported (vectype, group_size))
5066 store_lanes_p = true;
5067 else if (!vect_grouped_store_supported (vectype, group_size))
5068 return false;
5071 if (first_stmt == stmt)
5073 /* STMT is the leader of the group. Check the operands of all the
5074 stmts of the group. */
5075 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5076 while (next_stmt)
5078 gcc_assert (gimple_assign_single_p (next_stmt));
5079 op = gimple_assign_rhs1 (next_stmt);
5080 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5081 &def_stmt, &def, &dt))
5083 if (dump_enabled_p ())
5084 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5085 "use not simple.\n");
5086 return false;
5088 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5093 if (!vec_stmt) /* transformation not required. */
5095 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5096 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5097 NULL, NULL, NULL);
5098 return true;
5101 /** Transform. **/
5103 ensure_base_align (stmt_info, dr);
5105 if (grouped_store)
5107 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5108 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5110 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5112 /* FORNOW */
5113 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5115 /* We vectorize all the stmts of the interleaving group when we
5116 reach the last stmt in the group. */
5117 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5118 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5119 && !slp)
5121 *vec_stmt = NULL;
5122 return true;
5125 if (slp)
5127 grouped_store = false;
5128 /* VEC_NUM is the number of vect stmts to be created for this
5129 group. */
5130 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5131 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5132 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5133 op = gimple_assign_rhs1 (first_stmt);
5135 else
5136 /* VEC_NUM is the number of vect stmts to be created for this
5137 group. */
5138 vec_num = group_size;
5140 else
5142 first_stmt = stmt;
5143 first_dr = dr;
5144 group_size = vec_num = 1;
5147 if (dump_enabled_p ())
5148 dump_printf_loc (MSG_NOTE, vect_location,
5149 "transform store. ncopies = %d\n", ncopies);
5151 dr_chain.create (group_size);
5152 oprnds.create (group_size);
5154 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5155 gcc_assert (alignment_support_scheme);
5156 /* Targets with store-lane instructions must not require explicit
5157 realignment. */
5158 gcc_assert (!store_lanes_p
5159 || alignment_support_scheme == dr_aligned
5160 || alignment_support_scheme == dr_unaligned_supported);
5162 if (negative)
5163 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5165 if (store_lanes_p)
5166 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5167 else
5168 aggr_type = vectype;
5170 /* In case the vectorization factor (VF) is bigger than the number
5171 of elements that we can fit in a vectype (nunits), we have to generate
5172 more than one vector stmt - i.e - we need to "unroll" the
5173 vector stmt by a factor VF/nunits. For more details see documentation in
5174 vect_get_vec_def_for_copy_stmt. */
5176 /* In case of interleaving (non-unit grouped access):
5178 S1: &base + 2 = x2
5179 S2: &base = x0
5180 S3: &base + 1 = x1
5181 S4: &base + 3 = x3
5183 We create vectorized stores starting from base address (the access of the
5184 first stmt in the chain (S2 in the above example), when the last store stmt
5185 of the chain (S4) is reached:
5187 VS1: &base = vx2
5188 VS2: &base + vec_size*1 = vx0
5189 VS3: &base + vec_size*2 = vx1
5190 VS4: &base + vec_size*3 = vx3
5192 Then permutation statements are generated:
5194 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5195 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5198 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5199 (the order of the data-refs in the output of vect_permute_store_chain
5200 corresponds to the order of scalar stmts in the interleaving chain - see
5201 the documentation of vect_permute_store_chain()).
5203 In case of both multiple types and interleaving, above vector stores and
5204 permutation stmts are created for every copy. The result vector stmts are
5205 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5206 STMT_VINFO_RELATED_STMT for the next copies.
5209 prev_stmt_info = NULL;
5210 for (j = 0; j < ncopies; j++)
5212 gimple new_stmt;
5214 if (j == 0)
5216 if (slp)
5218 /* Get vectorized arguments for SLP_NODE. */
5219 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5220 NULL, slp_node, -1);
5222 vec_oprnd = vec_oprnds[0];
5224 else
5226 /* For interleaved stores we collect vectorized defs for all the
5227 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5228 used as an input to vect_permute_store_chain(), and OPRNDS as
5229 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5231 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5232 OPRNDS are of size 1. */
5233 next_stmt = first_stmt;
5234 for (i = 0; i < group_size; i++)
5236 /* Since gaps are not supported for interleaved stores,
5237 GROUP_SIZE is the exact number of stmts in the chain.
5238 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5239 there is no interleaving, GROUP_SIZE is 1, and only one
5240 iteration of the loop will be executed. */
5241 gcc_assert (next_stmt
5242 && gimple_assign_single_p (next_stmt));
5243 op = gimple_assign_rhs1 (next_stmt);
5245 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5246 NULL);
5247 dr_chain.quick_push (vec_oprnd);
5248 oprnds.quick_push (vec_oprnd);
5249 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5253 /* We should have catched mismatched types earlier. */
5254 gcc_assert (useless_type_conversion_p (vectype,
5255 TREE_TYPE (vec_oprnd)));
5256 bool simd_lane_access_p
5257 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5258 if (simd_lane_access_p
5259 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5260 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5261 && integer_zerop (DR_OFFSET (first_dr))
5262 && integer_zerop (DR_INIT (first_dr))
5263 && alias_sets_conflict_p (get_alias_set (aggr_type),
5264 get_alias_set (DR_REF (first_dr))))
5266 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5267 dataref_offset = build_int_cst (reference_alias_ptr_type
5268 (DR_REF (first_dr)), 0);
5269 inv_p = false;
5271 else
5272 dataref_ptr
5273 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5274 simd_lane_access_p ? loop : NULL,
5275 offset, &dummy, gsi, &ptr_incr,
5276 simd_lane_access_p, &inv_p);
5277 gcc_assert (bb_vinfo || !inv_p);
5279 else
5281 /* For interleaved stores we created vectorized defs for all the
5282 defs stored in OPRNDS in the previous iteration (previous copy).
5283 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5284 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5285 next copy.
5286 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5287 OPRNDS are of size 1. */
5288 for (i = 0; i < group_size; i++)
5290 op = oprnds[i];
5291 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5292 &def, &dt);
5293 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5294 dr_chain[i] = vec_oprnd;
5295 oprnds[i] = vec_oprnd;
5297 if (dataref_offset)
5298 dataref_offset
5299 = int_const_binop (PLUS_EXPR, dataref_offset,
5300 TYPE_SIZE_UNIT (aggr_type));
5301 else
5302 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5303 TYPE_SIZE_UNIT (aggr_type));
5306 if (store_lanes_p)
5308 tree vec_array;
5310 /* Combine all the vectors into an array. */
5311 vec_array = create_vector_array (vectype, vec_num);
5312 for (i = 0; i < vec_num; i++)
5314 vec_oprnd = dr_chain[i];
5315 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5318 /* Emit:
5319 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5320 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5321 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5322 gimple_call_set_lhs (new_stmt, data_ref);
5323 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5325 else
5327 new_stmt = NULL;
5328 if (grouped_store)
5330 if (j == 0)
5331 result_chain.create (group_size);
5332 /* Permute. */
5333 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5334 &result_chain);
5337 next_stmt = first_stmt;
5338 for (i = 0; i < vec_num; i++)
5340 unsigned align, misalign;
5342 if (i > 0)
5343 /* Bump the vector pointer. */
5344 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5345 stmt, NULL_TREE);
5347 if (slp)
5348 vec_oprnd = vec_oprnds[i];
5349 else if (grouped_store)
5350 /* For grouped stores vectorized defs are interleaved in
5351 vect_permute_store_chain(). */
5352 vec_oprnd = result_chain[i];
5354 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
5355 dataref_offset
5356 ? dataref_offset
5357 : build_int_cst (reference_alias_ptr_type
5358 (DR_REF (first_dr)), 0));
5359 align = TYPE_ALIGN_UNIT (vectype);
5360 if (aligned_access_p (first_dr))
5361 misalign = 0;
5362 else if (DR_MISALIGNMENT (first_dr) == -1)
5364 TREE_TYPE (data_ref)
5365 = build_aligned_type (TREE_TYPE (data_ref),
5366 TYPE_ALIGN (elem_type));
5367 align = TYPE_ALIGN_UNIT (elem_type);
5368 misalign = 0;
5370 else
5372 TREE_TYPE (data_ref)
5373 = build_aligned_type (TREE_TYPE (data_ref),
5374 TYPE_ALIGN (elem_type));
5375 misalign = DR_MISALIGNMENT (first_dr);
5377 if (dataref_offset == NULL_TREE)
5378 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5379 misalign);
5381 if (negative
5382 && dt != vect_constant_def
5383 && dt != vect_external_def)
5385 tree perm_mask = perm_mask_for_reverse (vectype);
5386 tree perm_dest
5387 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5388 vectype);
5389 tree new_temp = make_ssa_name (perm_dest, NULL);
5391 /* Generate the permute statement. */
5392 gimple perm_stmt
5393 = gimple_build_assign_with_ops (VEC_PERM_EXPR, new_temp,
5394 vec_oprnd, vec_oprnd,
5395 perm_mask);
5396 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5398 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5399 vec_oprnd = new_temp;
5402 /* Arguments are ready. Create the new vector stmt. */
5403 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5404 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5406 if (slp)
5407 continue;
5409 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5410 if (!next_stmt)
5411 break;
5414 if (!slp)
5416 if (j == 0)
5417 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5418 else
5419 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5420 prev_stmt_info = vinfo_for_stmt (new_stmt);
5424 dr_chain.release ();
5425 oprnds.release ();
5426 result_chain.release ();
5427 vec_oprnds.release ();
5429 return true;
5432 /* Given a vector type VECTYPE and permutation SEL returns
5433 the VECTOR_CST mask that implements the permutation of the
5434 vector elements. If that is impossible to do, returns NULL. */
5436 tree
5437 vect_gen_perm_mask (tree vectype, unsigned char *sel)
5439 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
5440 int i, nunits;
5442 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5444 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
5445 return NULL;
5447 mask_elt_type = lang_hooks.types.type_for_mode
5448 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
5449 mask_type = get_vectype_for_scalar_type (mask_elt_type);
5451 mask_elts = XALLOCAVEC (tree, nunits);
5452 for (i = nunits - 1; i >= 0; i--)
5453 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5454 mask_vec = build_vector (mask_type, mask_elts);
5456 return mask_vec;
5459 /* Given a vector variable X and Y, that was generated for the scalar
5460 STMT, generate instructions to permute the vector elements of X and Y
5461 using permutation mask MASK_VEC, insert them at *GSI and return the
5462 permuted vector variable. */
5464 static tree
5465 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5466 gimple_stmt_iterator *gsi)
5468 tree vectype = TREE_TYPE (x);
5469 tree perm_dest, data_ref;
5470 gimple perm_stmt;
5472 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
5473 data_ref = make_ssa_name (perm_dest, NULL);
5475 /* Generate the permute statement. */
5476 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
5477 x, y, mask_vec);
5478 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5480 return data_ref;
5483 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5484 inserting them on the loops preheader edge. Returns true if we
5485 were successful in doing so (and thus STMT can be moved then),
5486 otherwise returns false. */
5488 static bool
5489 hoist_defs_of_uses (gimple stmt, struct loop *loop)
5491 ssa_op_iter i;
5492 tree op;
5493 bool any = false;
5495 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5497 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5498 if (!gimple_nop_p (def_stmt)
5499 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5501 /* Make sure we don't need to recurse. While we could do
5502 so in simple cases when there are more complex use webs
5503 we don't have an easy way to preserve stmt order to fulfil
5504 dependencies within them. */
5505 tree op2;
5506 ssa_op_iter i2;
5507 if (gimple_code (def_stmt) == GIMPLE_PHI)
5508 return false;
5509 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5511 gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5512 if (!gimple_nop_p (def_stmt2)
5513 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5514 return false;
5516 any = true;
5520 if (!any)
5521 return true;
5523 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5525 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5526 if (!gimple_nop_p (def_stmt)
5527 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5529 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5530 gsi_remove (&gsi, false);
5531 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5535 return true;
5538 /* vectorizable_load.
5540 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5541 can be vectorized.
5542 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5543 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5544 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5546 static bool
5547 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5548 slp_tree slp_node, slp_instance slp_node_instance)
5550 tree scalar_dest;
5551 tree vec_dest = NULL;
5552 tree data_ref = NULL;
5553 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5554 stmt_vec_info prev_stmt_info;
5555 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5556 struct loop *loop = NULL;
5557 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
5558 bool nested_in_vect_loop = false;
5559 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5560 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5561 tree elem_type;
5562 tree new_temp;
5563 enum machine_mode mode;
5564 gimple new_stmt = NULL;
5565 tree dummy;
5566 enum dr_alignment_support alignment_support_scheme;
5567 tree dataref_ptr = NULL_TREE;
5568 tree dataref_offset = NULL_TREE;
5569 gimple ptr_incr = NULL;
5570 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5571 int ncopies;
5572 int i, j, group_size, group_gap;
5573 tree msq = NULL_TREE, lsq;
5574 tree offset = NULL_TREE;
5575 tree realignment_token = NULL_TREE;
5576 gimple phi = NULL;
5577 vec<tree> dr_chain = vNULL;
5578 bool grouped_load = false;
5579 bool load_lanes_p = false;
5580 gimple first_stmt;
5581 bool inv_p;
5582 bool negative = false;
5583 bool compute_in_loop = false;
5584 struct loop *at_loop;
5585 int vec_num;
5586 bool slp = (slp_node != NULL);
5587 bool slp_perm = false;
5588 enum tree_code code;
5589 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5590 int vf;
5591 tree aggr_type;
5592 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5593 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5594 int gather_scale = 1;
5595 enum vect_def_type gather_dt = vect_unknown_def_type;
5597 if (loop_vinfo)
5599 loop = LOOP_VINFO_LOOP (loop_vinfo);
5600 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5601 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5603 else
5604 vf = 1;
5606 /* Multiple types in SLP are handled by creating the appropriate number of
5607 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5608 case of SLP. */
5609 if (slp || PURE_SLP_STMT (stmt_info))
5610 ncopies = 1;
5611 else
5612 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5614 gcc_assert (ncopies >= 1);
5616 /* FORNOW. This restriction should be relaxed. */
5617 if (nested_in_vect_loop && ncopies > 1)
5619 if (dump_enabled_p ())
5620 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5621 "multiple types in nested loop.\n");
5622 return false;
5625 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5626 return false;
5628 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5629 return false;
5631 /* Is vectorizable load? */
5632 if (!is_gimple_assign (stmt))
5633 return false;
5635 scalar_dest = gimple_assign_lhs (stmt);
5636 if (TREE_CODE (scalar_dest) != SSA_NAME)
5637 return false;
5639 code = gimple_assign_rhs_code (stmt);
5640 if (code != ARRAY_REF
5641 && code != BIT_FIELD_REF
5642 && code != INDIRECT_REF
5643 && code != COMPONENT_REF
5644 && code != IMAGPART_EXPR
5645 && code != REALPART_EXPR
5646 && code != MEM_REF
5647 && TREE_CODE_CLASS (code) != tcc_declaration)
5648 return false;
5650 if (!STMT_VINFO_DATA_REF (stmt_info))
5651 return false;
5653 elem_type = TREE_TYPE (vectype);
5654 mode = TYPE_MODE (vectype);
5656 /* FORNOW. In some cases can vectorize even if data-type not supported
5657 (e.g. - data copies). */
5658 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
5660 if (dump_enabled_p ())
5661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5662 "Aligned load, but unsupported type.\n");
5663 return false;
5666 /* Check if the load is a part of an interleaving chain. */
5667 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5669 grouped_load = true;
5670 /* FORNOW */
5671 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
5673 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5674 if (!slp && !PURE_SLP_STMT (stmt_info))
5676 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5677 if (vect_load_lanes_supported (vectype, group_size))
5678 load_lanes_p = true;
5679 else if (!vect_grouped_load_supported (vectype, group_size))
5680 return false;
5685 if (STMT_VINFO_GATHER_P (stmt_info))
5687 gimple def_stmt;
5688 tree def;
5689 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
5690 &gather_off, &gather_scale);
5691 gcc_assert (gather_decl);
5692 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
5693 &def_stmt, &def, &gather_dt,
5694 &gather_off_vectype))
5696 if (dump_enabled_p ())
5697 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5698 "gather index use not simple.\n");
5699 return false;
5702 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
5704 else
5706 negative = tree_int_cst_compare (nested_in_vect_loop
5707 ? STMT_VINFO_DR_STEP (stmt_info)
5708 : DR_STEP (dr),
5709 size_zero_node) < 0;
5710 if (negative && ncopies > 1)
5712 if (dump_enabled_p ())
5713 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5714 "multiple types with negative step.\n");
5715 return false;
5718 if (negative)
5720 if (grouped_load)
5722 if (dump_enabled_p ())
5723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5724 "negative step for group load not supported"
5725 "\n");
5726 return false;
5728 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5729 if (alignment_support_scheme != dr_aligned
5730 && alignment_support_scheme != dr_unaligned_supported)
5732 if (dump_enabled_p ())
5733 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5734 "negative step but alignment required.\n");
5735 return false;
5737 if (!perm_mask_for_reverse (vectype))
5739 if (dump_enabled_p ())
5740 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5741 "negative step and reversing not supported."
5742 "\n");
5743 return false;
5748 if (!vec_stmt) /* transformation not required. */
5750 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
5751 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
5752 return true;
5755 if (dump_enabled_p ())
5756 dump_printf_loc (MSG_NOTE, vect_location,
5757 "transform load. ncopies = %d\n", ncopies);
5759 /** Transform. **/
5761 ensure_base_align (stmt_info, dr);
5763 if (STMT_VINFO_GATHER_P (stmt_info))
5765 tree vec_oprnd0 = NULL_TREE, op;
5766 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
5767 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5768 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
5769 edge pe = loop_preheader_edge (loop);
5770 gimple_seq seq;
5771 basic_block new_bb;
5772 enum { NARROW, NONE, WIDEN } modifier;
5773 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
5775 if (nunits == gather_off_nunits)
5776 modifier = NONE;
5777 else if (nunits == gather_off_nunits / 2)
5779 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
5780 modifier = WIDEN;
5782 for (i = 0; i < gather_off_nunits; ++i)
5783 sel[i] = i | nunits;
5785 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
5786 gcc_assert (perm_mask != NULL_TREE);
5788 else if (nunits == gather_off_nunits * 2)
5790 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5791 modifier = NARROW;
5793 for (i = 0; i < nunits; ++i)
5794 sel[i] = i < gather_off_nunits
5795 ? i : i + nunits - gather_off_nunits;
5797 perm_mask = vect_gen_perm_mask (vectype, sel);
5798 gcc_assert (perm_mask != NULL_TREE);
5799 ncopies *= 2;
5801 else
5802 gcc_unreachable ();
5804 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
5805 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5806 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5807 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5808 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5809 scaletype = TREE_VALUE (arglist);
5810 gcc_checking_assert (types_compatible_p (srctype, rettype));
5812 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5814 ptr = fold_convert (ptrtype, gather_base);
5815 if (!is_gimple_min_invariant (ptr))
5817 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5818 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5819 gcc_assert (!new_bb);
5822 /* Currently we support only unconditional gather loads,
5823 so mask should be all ones. */
5824 if (TREE_CODE (masktype) == INTEGER_TYPE)
5825 mask = build_int_cst (masktype, -1);
5826 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
5828 mask = build_int_cst (TREE_TYPE (masktype), -1);
5829 mask = build_vector_from_val (masktype, mask);
5830 mask = vect_init_vector (stmt, mask, masktype, NULL);
5832 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
5834 REAL_VALUE_TYPE r;
5835 long tmp[6];
5836 for (j = 0; j < 6; ++j)
5837 tmp[j] = -1;
5838 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
5839 mask = build_real (TREE_TYPE (masktype), r);
5840 mask = build_vector_from_val (masktype, mask);
5841 mask = vect_init_vector (stmt, mask, masktype, NULL);
5843 else
5844 gcc_unreachable ();
5846 scale = build_int_cst (scaletype, gather_scale);
5848 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
5849 merge = build_int_cst (TREE_TYPE (rettype), 0);
5850 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
5852 REAL_VALUE_TYPE r;
5853 long tmp[6];
5854 for (j = 0; j < 6; ++j)
5855 tmp[j] = 0;
5856 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
5857 merge = build_real (TREE_TYPE (rettype), r);
5859 else
5860 gcc_unreachable ();
5861 merge = build_vector_from_val (rettype, merge);
5862 merge = vect_init_vector (stmt, merge, rettype, NULL);
5864 prev_stmt_info = NULL;
5865 for (j = 0; j < ncopies; ++j)
5867 if (modifier == WIDEN && (j & 1))
5868 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
5869 perm_mask, stmt, gsi);
5870 else if (j == 0)
5871 op = vec_oprnd0
5872 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
5873 else
5874 op = vec_oprnd0
5875 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
5877 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5879 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5880 == TYPE_VECTOR_SUBPARTS (idxtype));
5881 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
5882 var = make_ssa_name (var, NULL);
5883 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5884 new_stmt
5885 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
5886 op, NULL_TREE);
5887 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5888 op = var;
5891 new_stmt
5892 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
5894 if (!useless_type_conversion_p (vectype, rettype))
5896 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
5897 == TYPE_VECTOR_SUBPARTS (rettype));
5898 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
5899 op = make_ssa_name (var, new_stmt);
5900 gimple_call_set_lhs (new_stmt, op);
5901 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5902 var = make_ssa_name (vec_dest, NULL);
5903 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
5904 new_stmt
5905 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
5906 NULL_TREE);
5908 else
5910 var = make_ssa_name (vec_dest, new_stmt);
5911 gimple_call_set_lhs (new_stmt, var);
5914 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5916 if (modifier == NARROW)
5918 if ((j & 1) == 0)
5920 prev_res = var;
5921 continue;
5923 var = permute_vec_elements (prev_res, var,
5924 perm_mask, stmt, gsi);
5925 new_stmt = SSA_NAME_DEF_STMT (var);
5928 if (prev_stmt_info == NULL)
5929 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5930 else
5931 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5932 prev_stmt_info = vinfo_for_stmt (new_stmt);
5934 return true;
5936 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
5938 gimple_stmt_iterator incr_gsi;
5939 bool insert_after;
5940 gimple incr;
5941 tree offvar;
5942 tree ivstep;
5943 tree running_off;
5944 vec<constructor_elt, va_gc> *v = NULL;
5945 gimple_seq stmts = NULL;
5946 tree stride_base, stride_step, alias_off;
5948 gcc_assert (!nested_in_vect_loop);
5950 stride_base
5951 = fold_build_pointer_plus
5952 (unshare_expr (DR_BASE_ADDRESS (dr)),
5953 size_binop (PLUS_EXPR,
5954 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
5955 convert_to_ptrofftype (DR_INIT (dr))));
5956 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
5958 /* For a load with loop-invariant (but other than power-of-2)
5959 stride (i.e. not a grouped access) like so:
5961 for (i = 0; i < n; i += stride)
5962 ... = array[i];
5964 we generate a new induction variable and new accesses to
5965 form a new vector (or vectors, depending on ncopies):
5967 for (j = 0; ; j += VF*stride)
5968 tmp1 = array[j];
5969 tmp2 = array[j + stride];
5971 vectemp = {tmp1, tmp2, ...}
5974 ivstep = stride_step;
5975 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5976 build_int_cst (TREE_TYPE (ivstep), vf));
5978 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5980 create_iv (stride_base, ivstep, NULL,
5981 loop, &incr_gsi, insert_after,
5982 &offvar, NULL);
5983 incr = gsi_stmt (incr_gsi);
5984 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
5986 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5987 if (stmts)
5988 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5990 prev_stmt_info = NULL;
5991 running_off = offvar;
5992 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
5993 for (j = 0; j < ncopies; j++)
5995 tree vec_inv;
5997 vec_alloc (v, nunits);
5998 for (i = 0; i < nunits; i++)
6000 tree newref, newoff;
6001 gimple incr;
6002 newref = build2 (MEM_REF, TREE_TYPE (vectype),
6003 running_off, alias_off);
6005 newref = force_gimple_operand_gsi (gsi, newref, true,
6006 NULL_TREE, true,
6007 GSI_SAME_STMT);
6008 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6009 newoff = copy_ssa_name (running_off, NULL);
6010 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
6011 running_off, stride_step);
6012 vect_finish_stmt_generation (stmt, incr, gsi);
6014 running_off = newoff;
6017 vec_inv = build_constructor (vectype, v);
6018 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6019 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6021 if (j == 0)
6022 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6023 else
6024 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6025 prev_stmt_info = vinfo_for_stmt (new_stmt);
6027 return true;
6030 if (grouped_load)
6032 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6033 if (slp
6034 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
6035 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6036 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6038 /* Check if the chain of loads is already vectorized. */
6039 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6040 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6041 ??? But we can only do so if there is exactly one
6042 as we have no way to get at the rest. Leave the CSE
6043 opportunity alone.
6044 ??? With the group load eventually participating
6045 in multiple different permutations (having multiple
6046 slp nodes which refer to the same group) the CSE
6047 is even wrong code. See PR56270. */
6048 && !slp)
6050 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6051 return true;
6053 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6054 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6056 /* VEC_NUM is the number of vect stmts to be created for this group. */
6057 if (slp)
6059 grouped_load = false;
6060 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6061 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6062 slp_perm = true;
6063 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
6065 else
6067 vec_num = group_size;
6068 group_gap = 0;
6071 else
6073 first_stmt = stmt;
6074 first_dr = dr;
6075 group_size = vec_num = 1;
6076 group_gap = 0;
6079 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6080 gcc_assert (alignment_support_scheme);
6081 /* Targets with load-lane instructions must not require explicit
6082 realignment. */
6083 gcc_assert (!load_lanes_p
6084 || alignment_support_scheme == dr_aligned
6085 || alignment_support_scheme == dr_unaligned_supported);
6087 /* In case the vectorization factor (VF) is bigger than the number
6088 of elements that we can fit in a vectype (nunits), we have to generate
6089 more than one vector stmt - i.e - we need to "unroll" the
6090 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6091 from one copy of the vector stmt to the next, in the field
6092 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6093 stages to find the correct vector defs to be used when vectorizing
6094 stmts that use the defs of the current stmt. The example below
6095 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6096 need to create 4 vectorized stmts):
6098 before vectorization:
6099 RELATED_STMT VEC_STMT
6100 S1: x = memref - -
6101 S2: z = x + 1 - -
6103 step 1: vectorize stmt S1:
6104 We first create the vector stmt VS1_0, and, as usual, record a
6105 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6106 Next, we create the vector stmt VS1_1, and record a pointer to
6107 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6108 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6109 stmts and pointers:
6110 RELATED_STMT VEC_STMT
6111 VS1_0: vx0 = memref0 VS1_1 -
6112 VS1_1: vx1 = memref1 VS1_2 -
6113 VS1_2: vx2 = memref2 VS1_3 -
6114 VS1_3: vx3 = memref3 - -
6115 S1: x = load - VS1_0
6116 S2: z = x + 1 - -
6118 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6119 information we recorded in RELATED_STMT field is used to vectorize
6120 stmt S2. */
6122 /* In case of interleaving (non-unit grouped access):
6124 S1: x2 = &base + 2
6125 S2: x0 = &base
6126 S3: x1 = &base + 1
6127 S4: x3 = &base + 3
6129 Vectorized loads are created in the order of memory accesses
6130 starting from the access of the first stmt of the chain:
6132 VS1: vx0 = &base
6133 VS2: vx1 = &base + vec_size*1
6134 VS3: vx3 = &base + vec_size*2
6135 VS4: vx4 = &base + vec_size*3
6137 Then permutation statements are generated:
6139 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6140 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6143 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6144 (the order of the data-refs in the output of vect_permute_load_chain
6145 corresponds to the order of scalar stmts in the interleaving chain - see
6146 the documentation of vect_permute_load_chain()).
6147 The generation of permutation stmts and recording them in
6148 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6150 In case of both multiple types and interleaving, the vector loads and
6151 permutation stmts above are created for every copy. The result vector
6152 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6153 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6155 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6156 on a target that supports unaligned accesses (dr_unaligned_supported)
6157 we generate the following code:
6158 p = initial_addr;
6159 indx = 0;
6160 loop {
6161 p = p + indx * vectype_size;
6162 vec_dest = *(p);
6163 indx = indx + 1;
6166 Otherwise, the data reference is potentially unaligned on a target that
6167 does not support unaligned accesses (dr_explicit_realign_optimized) -
6168 then generate the following code, in which the data in each iteration is
6169 obtained by two vector loads, one from the previous iteration, and one
6170 from the current iteration:
6171 p1 = initial_addr;
6172 msq_init = *(floor(p1))
6173 p2 = initial_addr + VS - 1;
6174 realignment_token = call target_builtin;
6175 indx = 0;
6176 loop {
6177 p2 = p2 + indx * vectype_size
6178 lsq = *(floor(p2))
6179 vec_dest = realign_load (msq, lsq, realignment_token)
6180 indx = indx + 1;
6181 msq = lsq;
6182 } */
6184 /* If the misalignment remains the same throughout the execution of the
6185 loop, we can create the init_addr and permutation mask at the loop
6186 preheader. Otherwise, it needs to be created inside the loop.
6187 This can only occur when vectorizing memory accesses in the inner-loop
6188 nested within an outer-loop that is being vectorized. */
6190 if (nested_in_vect_loop
6191 && (TREE_INT_CST_LOW (DR_STEP (dr))
6192 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6194 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6195 compute_in_loop = true;
6198 if ((alignment_support_scheme == dr_explicit_realign_optimized
6199 || alignment_support_scheme == dr_explicit_realign)
6200 && !compute_in_loop)
6202 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6203 alignment_support_scheme, NULL_TREE,
6204 &at_loop);
6205 if (alignment_support_scheme == dr_explicit_realign_optimized)
6207 phi = SSA_NAME_DEF_STMT (msq);
6208 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
6211 else
6212 at_loop = loop;
6214 if (negative)
6215 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6217 if (load_lanes_p)
6218 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6219 else
6220 aggr_type = vectype;
6222 prev_stmt_info = NULL;
6223 for (j = 0; j < ncopies; j++)
6225 /* 1. Create the vector or array pointer update chain. */
6226 if (j == 0)
6228 bool simd_lane_access_p
6229 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6230 if (simd_lane_access_p
6231 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6232 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6233 && integer_zerop (DR_OFFSET (first_dr))
6234 && integer_zerop (DR_INIT (first_dr))
6235 && alias_sets_conflict_p (get_alias_set (aggr_type),
6236 get_alias_set (DR_REF (first_dr)))
6237 && (alignment_support_scheme == dr_aligned
6238 || alignment_support_scheme == dr_unaligned_supported))
6240 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6241 dataref_offset = build_int_cst (reference_alias_ptr_type
6242 (DR_REF (first_dr)), 0);
6243 inv_p = false;
6245 else
6246 dataref_ptr
6247 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6248 offset, &dummy, gsi, &ptr_incr,
6249 simd_lane_access_p, &inv_p);
6251 else if (dataref_offset)
6252 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6253 TYPE_SIZE_UNIT (aggr_type));
6254 else
6255 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6256 TYPE_SIZE_UNIT (aggr_type));
6258 if (grouped_load || slp_perm)
6259 dr_chain.create (vec_num);
6261 if (load_lanes_p)
6263 tree vec_array;
6265 vec_array = create_vector_array (vectype, vec_num);
6267 /* Emit:
6268 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6269 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6270 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6271 gimple_call_set_lhs (new_stmt, vec_array);
6272 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6274 /* Extract each vector into an SSA_NAME. */
6275 for (i = 0; i < vec_num; i++)
6277 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6278 vec_array, i);
6279 dr_chain.quick_push (new_temp);
6282 /* Record the mapping between SSA_NAMEs and statements. */
6283 vect_record_grouped_load_vectors (stmt, dr_chain);
6285 else
6287 for (i = 0; i < vec_num; i++)
6289 if (i > 0)
6290 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6291 stmt, NULL_TREE);
6293 /* 2. Create the vector-load in the loop. */
6294 switch (alignment_support_scheme)
6296 case dr_aligned:
6297 case dr_unaligned_supported:
6299 unsigned int align, misalign;
6301 data_ref
6302 = build2 (MEM_REF, vectype, dataref_ptr,
6303 dataref_offset
6304 ? dataref_offset
6305 : build_int_cst (reference_alias_ptr_type
6306 (DR_REF (first_dr)), 0));
6307 align = TYPE_ALIGN_UNIT (vectype);
6308 if (alignment_support_scheme == dr_aligned)
6310 gcc_assert (aligned_access_p (first_dr));
6311 misalign = 0;
6313 else if (DR_MISALIGNMENT (first_dr) == -1)
6315 TREE_TYPE (data_ref)
6316 = build_aligned_type (TREE_TYPE (data_ref),
6317 TYPE_ALIGN (elem_type));
6318 align = TYPE_ALIGN_UNIT (elem_type);
6319 misalign = 0;
6321 else
6323 TREE_TYPE (data_ref)
6324 = build_aligned_type (TREE_TYPE (data_ref),
6325 TYPE_ALIGN (elem_type));
6326 misalign = DR_MISALIGNMENT (first_dr);
6328 if (dataref_offset == NULL_TREE)
6329 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6330 align, misalign);
6331 break;
6333 case dr_explicit_realign:
6335 tree ptr, bump;
6336 tree vs_minus_1;
6338 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
6340 if (compute_in_loop)
6341 msq = vect_setup_realignment (first_stmt, gsi,
6342 &realignment_token,
6343 dr_explicit_realign,
6344 dataref_ptr, NULL);
6346 ptr = copy_ssa_name (dataref_ptr, NULL);
6347 new_stmt = gimple_build_assign_with_ops
6348 (BIT_AND_EXPR, ptr, dataref_ptr,
6349 build_int_cst
6350 (TREE_TYPE (dataref_ptr),
6351 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6352 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6353 data_ref
6354 = build2 (MEM_REF, vectype, ptr,
6355 build_int_cst (reference_alias_ptr_type
6356 (DR_REF (first_dr)), 0));
6357 vec_dest = vect_create_destination_var (scalar_dest,
6358 vectype);
6359 new_stmt = gimple_build_assign (vec_dest, data_ref);
6360 new_temp = make_ssa_name (vec_dest, new_stmt);
6361 gimple_assign_set_lhs (new_stmt, new_temp);
6362 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6363 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6364 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6365 msq = new_temp;
6367 bump = size_binop (MULT_EXPR, vs_minus_1,
6368 TYPE_SIZE_UNIT (elem_type));
6369 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
6370 new_stmt = gimple_build_assign_with_ops
6371 (BIT_AND_EXPR, NULL_TREE, ptr,
6372 build_int_cst
6373 (TREE_TYPE (ptr),
6374 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6375 ptr = copy_ssa_name (dataref_ptr, new_stmt);
6376 gimple_assign_set_lhs (new_stmt, ptr);
6377 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6378 data_ref
6379 = build2 (MEM_REF, vectype, ptr,
6380 build_int_cst (reference_alias_ptr_type
6381 (DR_REF (first_dr)), 0));
6382 break;
6384 case dr_explicit_realign_optimized:
6385 new_temp = copy_ssa_name (dataref_ptr, NULL);
6386 new_stmt = gimple_build_assign_with_ops
6387 (BIT_AND_EXPR, new_temp, dataref_ptr,
6388 build_int_cst
6389 (TREE_TYPE (dataref_ptr),
6390 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6391 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6392 data_ref
6393 = build2 (MEM_REF, vectype, new_temp,
6394 build_int_cst (reference_alias_ptr_type
6395 (DR_REF (first_dr)), 0));
6396 break;
6397 default:
6398 gcc_unreachable ();
6400 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6401 new_stmt = gimple_build_assign (vec_dest, data_ref);
6402 new_temp = make_ssa_name (vec_dest, new_stmt);
6403 gimple_assign_set_lhs (new_stmt, new_temp);
6404 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6406 /* 3. Handle explicit realignment if necessary/supported.
6407 Create in loop:
6408 vec_dest = realign_load (msq, lsq, realignment_token) */
6409 if (alignment_support_scheme == dr_explicit_realign_optimized
6410 || alignment_support_scheme == dr_explicit_realign)
6412 lsq = gimple_assign_lhs (new_stmt);
6413 if (!realignment_token)
6414 realignment_token = dataref_ptr;
6415 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6416 new_stmt
6417 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
6418 vec_dest, msq, lsq,
6419 realignment_token);
6420 new_temp = make_ssa_name (vec_dest, new_stmt);
6421 gimple_assign_set_lhs (new_stmt, new_temp);
6422 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6424 if (alignment_support_scheme == dr_explicit_realign_optimized)
6426 gcc_assert (phi);
6427 if (i == vec_num - 1 && j == ncopies - 1)
6428 add_phi_arg (phi, lsq,
6429 loop_latch_edge (containing_loop),
6430 UNKNOWN_LOCATION);
6431 msq = lsq;
6435 /* 4. Handle invariant-load. */
6436 if (inv_p && !bb_vinfo)
6438 gcc_assert (!grouped_load);
6439 /* If we have versioned for aliasing or the loop doesn't
6440 have any data dependencies that would preclude this,
6441 then we are sure this is a loop invariant load and
6442 thus we can insert it on the preheader edge. */
6443 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
6444 && !nested_in_vect_loop
6445 && hoist_defs_of_uses (stmt, loop))
6447 if (dump_enabled_p ())
6449 dump_printf_loc (MSG_NOTE, vect_location,
6450 "hoisting out of the vectorized "
6451 "loop: ");
6452 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6453 dump_printf (MSG_NOTE, "\n");
6455 tree tem = copy_ssa_name (scalar_dest, NULL);
6456 gsi_insert_on_edge_immediate
6457 (loop_preheader_edge (loop),
6458 gimple_build_assign (tem,
6459 unshare_expr
6460 (gimple_assign_rhs1 (stmt))));
6461 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6463 else
6465 gimple_stmt_iterator gsi2 = *gsi;
6466 gsi_next (&gsi2);
6467 new_temp = vect_init_vector (stmt, scalar_dest,
6468 vectype, &gsi2);
6470 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6471 set_vinfo_for_stmt (new_stmt,
6472 new_stmt_vec_info (new_stmt, loop_vinfo,
6473 bb_vinfo));
6476 if (negative)
6478 tree perm_mask = perm_mask_for_reverse (vectype);
6479 new_temp = permute_vec_elements (new_temp, new_temp,
6480 perm_mask, stmt, gsi);
6481 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6484 /* Collect vector loads and later create their permutation in
6485 vect_transform_grouped_load (). */
6486 if (grouped_load || slp_perm)
6487 dr_chain.quick_push (new_temp);
6489 /* Store vector loads in the corresponding SLP_NODE. */
6490 if (slp && !slp_perm)
6491 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6493 /* Bump the vector pointer to account for a gap. */
6494 if (slp && group_gap != 0)
6496 tree bump = size_binop (MULT_EXPR,
6497 TYPE_SIZE_UNIT (elem_type),
6498 size_int (group_gap));
6499 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6500 stmt, bump);
6504 if (slp && !slp_perm)
6505 continue;
6507 if (slp_perm)
6509 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6510 slp_node_instance, false))
6512 dr_chain.release ();
6513 return false;
6516 else
6518 if (grouped_load)
6520 if (!load_lanes_p)
6521 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
6522 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6524 else
6526 if (j == 0)
6527 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6528 else
6529 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6530 prev_stmt_info = vinfo_for_stmt (new_stmt);
6533 dr_chain.release ();
6536 return true;
6539 /* Function vect_is_simple_cond.
6541 Input:
6542 LOOP - the loop that is being vectorized.
6543 COND - Condition that is checked for simple use.
6545 Output:
6546 *COMP_VECTYPE - the vector type for the comparison.
6548 Returns whether a COND can be vectorized. Checks whether
6549 condition operands are supportable using vec_is_simple_use. */
6551 static bool
6552 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6553 bb_vec_info bb_vinfo, tree *comp_vectype)
6555 tree lhs, rhs;
6556 tree def;
6557 enum vect_def_type dt;
6558 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
6560 if (!COMPARISON_CLASS_P (cond))
6561 return false;
6563 lhs = TREE_OPERAND (cond, 0);
6564 rhs = TREE_OPERAND (cond, 1);
6566 if (TREE_CODE (lhs) == SSA_NAME)
6568 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
6569 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6570 &lhs_def_stmt, &def, &dt, &vectype1))
6571 return false;
6573 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6574 && TREE_CODE (lhs) != FIXED_CST)
6575 return false;
6577 if (TREE_CODE (rhs) == SSA_NAME)
6579 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
6580 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6581 &rhs_def_stmt, &def, &dt, &vectype2))
6582 return false;
6584 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
6585 && TREE_CODE (rhs) != FIXED_CST)
6586 return false;
6588 *comp_vectype = vectype1 ? vectype1 : vectype2;
6589 return true;
6592 /* vectorizable_condition.
6594 Check if STMT is conditional modify expression that can be vectorized.
6595 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6596 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6597 at GSI.
6599 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6600 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6601 else caluse if it is 2).
6603 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6605 bool
6606 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
6607 gimple *vec_stmt, tree reduc_def, int reduc_index,
6608 slp_tree slp_node)
6610 tree scalar_dest = NULL_TREE;
6611 tree vec_dest = NULL_TREE;
6612 tree cond_expr, then_clause, else_clause;
6613 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6614 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6615 tree comp_vectype = NULL_TREE;
6616 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
6617 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
6618 tree vec_compare, vec_cond_expr;
6619 tree new_temp;
6620 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6621 tree def;
6622 enum vect_def_type dt, dts[4];
6623 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6624 int ncopies;
6625 enum tree_code code;
6626 stmt_vec_info prev_stmt_info = NULL;
6627 int i, j;
6628 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6629 vec<tree> vec_oprnds0 = vNULL;
6630 vec<tree> vec_oprnds1 = vNULL;
6631 vec<tree> vec_oprnds2 = vNULL;
6632 vec<tree> vec_oprnds3 = vNULL;
6633 tree vec_cmp_type;
6635 if (slp_node || PURE_SLP_STMT (stmt_info))
6636 ncopies = 1;
6637 else
6638 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6640 gcc_assert (ncopies >= 1);
6641 if (reduc_index && ncopies > 1)
6642 return false; /* FORNOW */
6644 if (reduc_index && STMT_SLP_TYPE (stmt_info))
6645 return false;
6647 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6648 return false;
6650 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6651 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
6652 && reduc_def))
6653 return false;
6655 /* FORNOW: not yet supported. */
6656 if (STMT_VINFO_LIVE_P (stmt_info))
6658 if (dump_enabled_p ())
6659 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6660 "value used after loop.\n");
6661 return false;
6664 /* Is vectorizable conditional operation? */
6665 if (!is_gimple_assign (stmt))
6666 return false;
6668 code = gimple_assign_rhs_code (stmt);
6670 if (code != COND_EXPR)
6671 return false;
6673 cond_expr = gimple_assign_rhs1 (stmt);
6674 then_clause = gimple_assign_rhs2 (stmt);
6675 else_clause = gimple_assign_rhs3 (stmt);
6677 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
6678 &comp_vectype)
6679 || !comp_vectype)
6680 return false;
6682 if (TREE_CODE (then_clause) == SSA_NAME)
6684 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
6685 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
6686 &then_def_stmt, &def, &dt))
6687 return false;
6689 else if (TREE_CODE (then_clause) != INTEGER_CST
6690 && TREE_CODE (then_clause) != REAL_CST
6691 && TREE_CODE (then_clause) != FIXED_CST)
6692 return false;
6694 if (TREE_CODE (else_clause) == SSA_NAME)
6696 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
6697 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
6698 &else_def_stmt, &def, &dt))
6699 return false;
6701 else if (TREE_CODE (else_clause) != INTEGER_CST
6702 && TREE_CODE (else_clause) != REAL_CST
6703 && TREE_CODE (else_clause) != FIXED_CST)
6704 return false;
6706 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
6707 /* The result of a vector comparison should be signed type. */
6708 tree cmp_type = build_nonstandard_integer_type (prec, 0);
6709 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
6710 if (vec_cmp_type == NULL_TREE)
6711 return false;
6713 if (!vec_stmt)
6715 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
6716 return expand_vec_cond_expr_p (vectype, comp_vectype);
6719 /* Transform. */
6721 if (!slp_node)
6723 vec_oprnds0.create (1);
6724 vec_oprnds1.create (1);
6725 vec_oprnds2.create (1);
6726 vec_oprnds3.create (1);
6729 /* Handle def. */
6730 scalar_dest = gimple_assign_lhs (stmt);
6731 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6733 /* Handle cond expr. */
6734 for (j = 0; j < ncopies; j++)
6736 gimple new_stmt = NULL;
6737 if (j == 0)
6739 if (slp_node)
6741 auto_vec<tree, 4> ops;
6742 auto_vec<vec<tree>, 4> vec_defs;
6744 ops.safe_push (TREE_OPERAND (cond_expr, 0));
6745 ops.safe_push (TREE_OPERAND (cond_expr, 1));
6746 ops.safe_push (then_clause);
6747 ops.safe_push (else_clause);
6748 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
6749 vec_oprnds3 = vec_defs.pop ();
6750 vec_oprnds2 = vec_defs.pop ();
6751 vec_oprnds1 = vec_defs.pop ();
6752 vec_oprnds0 = vec_defs.pop ();
6754 ops.release ();
6755 vec_defs.release ();
6757 else
6759 gimple gtemp;
6760 vec_cond_lhs =
6761 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
6762 stmt, NULL);
6763 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
6764 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
6766 vec_cond_rhs =
6767 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
6768 stmt, NULL);
6769 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
6770 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
6771 if (reduc_index == 1)
6772 vec_then_clause = reduc_def;
6773 else
6775 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
6776 stmt, NULL);
6777 vect_is_simple_use (then_clause, stmt, loop_vinfo,
6778 NULL, &gtemp, &def, &dts[2]);
6780 if (reduc_index == 2)
6781 vec_else_clause = reduc_def;
6782 else
6784 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
6785 stmt, NULL);
6786 vect_is_simple_use (else_clause, stmt, loop_vinfo,
6787 NULL, &gtemp, &def, &dts[3]);
6791 else
6793 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
6794 vec_oprnds0.pop ());
6795 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
6796 vec_oprnds1.pop ());
6797 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
6798 vec_oprnds2.pop ());
6799 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
6800 vec_oprnds3.pop ());
6803 if (!slp_node)
6805 vec_oprnds0.quick_push (vec_cond_lhs);
6806 vec_oprnds1.quick_push (vec_cond_rhs);
6807 vec_oprnds2.quick_push (vec_then_clause);
6808 vec_oprnds3.quick_push (vec_else_clause);
6811 /* Arguments are ready. Create the new vector stmt. */
6812 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
6814 vec_cond_rhs = vec_oprnds1[i];
6815 vec_then_clause = vec_oprnds2[i];
6816 vec_else_clause = vec_oprnds3[i];
6818 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
6819 vec_cond_lhs, vec_cond_rhs);
6820 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
6821 vec_compare, vec_then_clause, vec_else_clause);
6823 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
6824 new_temp = make_ssa_name (vec_dest, new_stmt);
6825 gimple_assign_set_lhs (new_stmt, new_temp);
6826 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6827 if (slp_node)
6828 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6831 if (slp_node)
6832 continue;
6834 if (j == 0)
6835 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6836 else
6837 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6839 prev_stmt_info = vinfo_for_stmt (new_stmt);
6842 vec_oprnds0.release ();
6843 vec_oprnds1.release ();
6844 vec_oprnds2.release ();
6845 vec_oprnds3.release ();
6847 return true;
6851 /* Make sure the statement is vectorizable. */
6853 bool
6854 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
6856 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6857 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6858 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
6859 bool ok;
6860 tree scalar_type, vectype;
6861 gimple pattern_stmt;
6862 gimple_seq pattern_def_seq;
6864 if (dump_enabled_p ())
6866 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
6867 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6868 dump_printf (MSG_NOTE, "\n");
6871 if (gimple_has_volatile_ops (stmt))
6873 if (dump_enabled_p ())
6874 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6875 "not vectorized: stmt has volatile operands\n");
6877 return false;
6880 /* Skip stmts that do not need to be vectorized. In loops this is expected
6881 to include:
6882 - the COND_EXPR which is the loop exit condition
6883 - any LABEL_EXPRs in the loop
6884 - computations that are used only for array indexing or loop control.
6885 In basic blocks we only analyze statements that are a part of some SLP
6886 instance, therefore, all the statements are relevant.
6888 Pattern statement needs to be analyzed instead of the original statement
6889 if the original statement is not relevant. Otherwise, we analyze both
6890 statements. In basic blocks we are called from some SLP instance
6891 traversal, don't analyze pattern stmts instead, the pattern stmts
6892 already will be part of SLP instance. */
6894 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
6895 if (!STMT_VINFO_RELEVANT_P (stmt_info)
6896 && !STMT_VINFO_LIVE_P (stmt_info))
6898 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6899 && pattern_stmt
6900 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6901 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6903 /* Analyze PATTERN_STMT instead of the original stmt. */
6904 stmt = pattern_stmt;
6905 stmt_info = vinfo_for_stmt (pattern_stmt);
6906 if (dump_enabled_p ())
6908 dump_printf_loc (MSG_NOTE, vect_location,
6909 "==> examining pattern statement: ");
6910 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6911 dump_printf (MSG_NOTE, "\n");
6914 else
6916 if (dump_enabled_p ())
6917 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
6919 return true;
6922 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6923 && node == NULL
6924 && pattern_stmt
6925 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6926 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6928 /* Analyze PATTERN_STMT too. */
6929 if (dump_enabled_p ())
6931 dump_printf_loc (MSG_NOTE, vect_location,
6932 "==> examining pattern statement: ");
6933 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6934 dump_printf (MSG_NOTE, "\n");
6937 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
6938 return false;
6941 if (is_pattern_stmt_p (stmt_info)
6942 && node == NULL
6943 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
6945 gimple_stmt_iterator si;
6947 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
6949 gimple pattern_def_stmt = gsi_stmt (si);
6950 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
6951 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
6953 /* Analyze def stmt of STMT if it's a pattern stmt. */
6954 if (dump_enabled_p ())
6956 dump_printf_loc (MSG_NOTE, vect_location,
6957 "==> examining pattern def statement: ");
6958 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
6959 dump_printf (MSG_NOTE, "\n");
6962 if (!vect_analyze_stmt (pattern_def_stmt,
6963 need_to_vectorize, node))
6964 return false;
6969 switch (STMT_VINFO_DEF_TYPE (stmt_info))
6971 case vect_internal_def:
6972 break;
6974 case vect_reduction_def:
6975 case vect_nested_cycle:
6976 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
6977 || relevance == vect_used_in_outer_by_reduction
6978 || relevance == vect_unused_in_scope));
6979 break;
6981 case vect_induction_def:
6982 case vect_constant_def:
6983 case vect_external_def:
6984 case vect_unknown_def_type:
6985 default:
6986 gcc_unreachable ();
6989 if (bb_vinfo)
6991 gcc_assert (PURE_SLP_STMT (stmt_info));
6993 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
6994 if (dump_enabled_p ())
6996 dump_printf_loc (MSG_NOTE, vect_location,
6997 "get vectype for scalar type: ");
6998 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
6999 dump_printf (MSG_NOTE, "\n");
7002 vectype = get_vectype_for_scalar_type (scalar_type);
7003 if (!vectype)
7005 if (dump_enabled_p ())
7007 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7008 "not SLPed: unsupported data-type ");
7009 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7010 scalar_type);
7011 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7013 return false;
7016 if (dump_enabled_p ())
7018 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7019 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
7020 dump_printf (MSG_NOTE, "\n");
7023 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7026 if (STMT_VINFO_RELEVANT_P (stmt_info))
7028 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
7029 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7030 || (is_gimple_call (stmt)
7031 && gimple_call_lhs (stmt) == NULL_TREE));
7032 *need_to_vectorize = true;
7035 ok = true;
7036 if (!bb_vinfo
7037 && (STMT_VINFO_RELEVANT_P (stmt_info)
7038 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
7039 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, NULL)
7040 || vectorizable_conversion (stmt, NULL, NULL, NULL)
7041 || vectorizable_shift (stmt, NULL, NULL, NULL)
7042 || vectorizable_operation (stmt, NULL, NULL, NULL)
7043 || vectorizable_assignment (stmt, NULL, NULL, NULL)
7044 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
7045 || vectorizable_call (stmt, NULL, NULL, NULL)
7046 || vectorizable_store (stmt, NULL, NULL, NULL)
7047 || vectorizable_reduction (stmt, NULL, NULL, NULL)
7048 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
7049 else
7051 if (bb_vinfo)
7052 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7053 || vectorizable_conversion (stmt, NULL, NULL, node)
7054 || vectorizable_shift (stmt, NULL, NULL, node)
7055 || vectorizable_operation (stmt, NULL, NULL, node)
7056 || vectorizable_assignment (stmt, NULL, NULL, node)
7057 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7058 || vectorizable_call (stmt, NULL, NULL, node)
7059 || vectorizable_store (stmt, NULL, NULL, node)
7060 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7063 if (!ok)
7065 if (dump_enabled_p ())
7067 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7068 "not vectorized: relevant stmt not ");
7069 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7070 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7071 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7074 return false;
7077 if (bb_vinfo)
7078 return true;
7080 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7081 need extra handling, except for vectorizable reductions. */
7082 if (STMT_VINFO_LIVE_P (stmt_info)
7083 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7084 ok = vectorizable_live_operation (stmt, NULL, NULL);
7086 if (!ok)
7088 if (dump_enabled_p ())
7090 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7091 "not vectorized: live stmt not ");
7092 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7093 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7094 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7097 return false;
7100 return true;
7104 /* Function vect_transform_stmt.
7106 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7108 bool
7109 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
7110 bool *grouped_store, slp_tree slp_node,
7111 slp_instance slp_node_instance)
7113 bool is_store = false;
7114 gimple vec_stmt = NULL;
7115 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7116 bool done;
7118 switch (STMT_VINFO_TYPE (stmt_info))
7120 case type_demotion_vec_info_type:
7121 case type_promotion_vec_info_type:
7122 case type_conversion_vec_info_type:
7123 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7124 gcc_assert (done);
7125 break;
7127 case induc_vec_info_type:
7128 gcc_assert (!slp_node);
7129 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7130 gcc_assert (done);
7131 break;
7133 case shift_vec_info_type:
7134 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7135 gcc_assert (done);
7136 break;
7138 case op_vec_info_type:
7139 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7140 gcc_assert (done);
7141 break;
7143 case assignment_vec_info_type:
7144 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7145 gcc_assert (done);
7146 break;
7148 case load_vec_info_type:
7149 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
7150 slp_node_instance);
7151 gcc_assert (done);
7152 break;
7154 case store_vec_info_type:
7155 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7156 gcc_assert (done);
7157 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
7159 /* In case of interleaving, the whole chain is vectorized when the
7160 last store in the chain is reached. Store stmts before the last
7161 one are skipped, and there vec_stmt_info shouldn't be freed
7162 meanwhile. */
7163 *grouped_store = true;
7164 if (STMT_VINFO_VEC_STMT (stmt_info))
7165 is_store = true;
7167 else
7168 is_store = true;
7169 break;
7171 case condition_vec_info_type:
7172 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
7173 gcc_assert (done);
7174 break;
7176 case call_vec_info_type:
7177 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
7178 stmt = gsi_stmt (*gsi);
7179 if (is_gimple_call (stmt)
7180 && gimple_call_internal_p (stmt)
7181 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7182 is_store = true;
7183 break;
7185 case call_simd_clone_vec_info_type:
7186 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7187 stmt = gsi_stmt (*gsi);
7188 break;
7190 case reduc_vec_info_type:
7191 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
7192 gcc_assert (done);
7193 break;
7195 default:
7196 if (!STMT_VINFO_LIVE_P (stmt_info))
7198 if (dump_enabled_p ())
7199 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7200 "stmt not supported.\n");
7201 gcc_unreachable ();
7205 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7206 is being vectorized, but outside the immediately enclosing loop. */
7207 if (vec_stmt
7208 && STMT_VINFO_LOOP_VINFO (stmt_info)
7209 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7210 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
7211 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7212 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
7213 || STMT_VINFO_RELEVANT (stmt_info) ==
7214 vect_used_in_outer_by_reduction))
7216 struct loop *innerloop = LOOP_VINFO_LOOP (
7217 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
7218 imm_use_iterator imm_iter;
7219 use_operand_p use_p;
7220 tree scalar_dest;
7221 gimple exit_phi;
7223 if (dump_enabled_p ())
7224 dump_printf_loc (MSG_NOTE, vect_location,
7225 "Record the vdef for outer-loop vectorization.\n");
7227 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7228 (to be used when vectorizing outer-loop stmts that use the DEF of
7229 STMT). */
7230 if (gimple_code (stmt) == GIMPLE_PHI)
7231 scalar_dest = PHI_RESULT (stmt);
7232 else
7233 scalar_dest = gimple_assign_lhs (stmt);
7235 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7237 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7239 exit_phi = USE_STMT (use_p);
7240 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7245 /* Handle stmts whose DEF is used outside the loop-nest that is
7246 being vectorized. */
7247 if (STMT_VINFO_LIVE_P (stmt_info)
7248 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7250 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7251 gcc_assert (done);
7254 if (vec_stmt)
7255 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
7257 return is_store;
7261 /* Remove a group of stores (for SLP or interleaving), free their
7262 stmt_vec_info. */
7264 void
7265 vect_remove_stores (gimple first_stmt)
7267 gimple next = first_stmt;
7268 gimple tmp;
7269 gimple_stmt_iterator next_si;
7271 while (next)
7273 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7275 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7276 if (is_pattern_stmt_p (stmt_info))
7277 next = STMT_VINFO_RELATED_STMT (stmt_info);
7278 /* Free the attached stmt_vec_info and remove the stmt. */
7279 next_si = gsi_for_stmt (next);
7280 unlink_stmt_vdef (next);
7281 gsi_remove (&next_si, true);
7282 release_defs (next);
7283 free_stmt_vec_info (next);
7284 next = tmp;
7289 /* Function new_stmt_vec_info.
7291 Create and initialize a new stmt_vec_info struct for STMT. */
7293 stmt_vec_info
7294 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
7295 bb_vec_info bb_vinfo)
7297 stmt_vec_info res;
7298 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7300 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7301 STMT_VINFO_STMT (res) = stmt;
7302 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
7303 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
7304 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
7305 STMT_VINFO_LIVE_P (res) = false;
7306 STMT_VINFO_VECTYPE (res) = NULL;
7307 STMT_VINFO_VEC_STMT (res) = NULL;
7308 STMT_VINFO_VECTORIZABLE (res) = true;
7309 STMT_VINFO_IN_PATTERN_P (res) = false;
7310 STMT_VINFO_RELATED_STMT (res) = NULL;
7311 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
7312 STMT_VINFO_DATA_REF (res) = NULL;
7314 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7315 STMT_VINFO_DR_OFFSET (res) = NULL;
7316 STMT_VINFO_DR_INIT (res) = NULL;
7317 STMT_VINFO_DR_STEP (res) = NULL;
7318 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7320 if (gimple_code (stmt) == GIMPLE_PHI
7321 && is_loop_header_bb_p (gimple_bb (stmt)))
7322 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7323 else
7324 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7326 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
7327 STMT_SLP_TYPE (res) = loop_vect;
7328 GROUP_FIRST_ELEMENT (res) = NULL;
7329 GROUP_NEXT_ELEMENT (res) = NULL;
7330 GROUP_SIZE (res) = 0;
7331 GROUP_STORE_COUNT (res) = 0;
7332 GROUP_GAP (res) = 0;
7333 GROUP_SAME_DR_STMT (res) = NULL;
7335 return res;
7339 /* Create a hash table for stmt_vec_info. */
7341 void
7342 init_stmt_vec_info_vec (void)
7344 gcc_assert (!stmt_vec_info_vec.exists ());
7345 stmt_vec_info_vec.create (50);
7349 /* Free hash table for stmt_vec_info. */
7351 void
7352 free_stmt_vec_info_vec (void)
7354 unsigned int i;
7355 vec_void_p info;
7356 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7357 if (info != NULL)
7358 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
7359 gcc_assert (stmt_vec_info_vec.exists ());
7360 stmt_vec_info_vec.release ();
7364 /* Free stmt vectorization related info. */
7366 void
7367 free_stmt_vec_info (gimple stmt)
7369 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7371 if (!stmt_info)
7372 return;
7374 /* Check if this statement has a related "pattern stmt"
7375 (introduced by the vectorizer during the pattern recognition
7376 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7377 too. */
7378 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7380 stmt_vec_info patt_info
7381 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7382 if (patt_info)
7384 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
7385 if (seq)
7387 gimple_stmt_iterator si;
7388 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
7389 free_stmt_vec_info (gsi_stmt (si));
7391 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
7395 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
7396 set_vinfo_for_stmt (stmt, NULL);
7397 free (stmt_info);
7401 /* Function get_vectype_for_scalar_type_and_size.
7403 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7404 by the target. */
7406 static tree
7407 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
7409 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
7410 enum machine_mode simd_mode;
7411 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
7412 int nunits;
7413 tree vectype;
7415 if (nbytes == 0)
7416 return NULL_TREE;
7418 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7419 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7420 return NULL_TREE;
7422 /* For vector types of elements whose mode precision doesn't
7423 match their types precision we use a element type of mode
7424 precision. The vectorization routines will have to make sure
7425 they support the proper result truncation/extension.
7426 We also make sure to build vector types with INTEGER_TYPE
7427 component type only. */
7428 if (INTEGRAL_TYPE_P (scalar_type)
7429 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7430 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7431 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7432 TYPE_UNSIGNED (scalar_type));
7434 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7435 When the component mode passes the above test simply use a type
7436 corresponding to that mode. The theory is that any use that
7437 would cause problems with this will disable vectorization anyway. */
7438 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
7439 && !INTEGRAL_TYPE_P (scalar_type))
7440 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7442 /* We can't build a vector type of elements with alignment bigger than
7443 their size. */
7444 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
7445 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7446 TYPE_UNSIGNED (scalar_type));
7448 /* If we felt back to using the mode fail if there was
7449 no scalar type for it. */
7450 if (scalar_type == NULL_TREE)
7451 return NULL_TREE;
7453 /* If no size was supplied use the mode the target prefers. Otherwise
7454 lookup a vector mode of the specified size. */
7455 if (size == 0)
7456 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7457 else
7458 simd_mode = mode_for_vector (inner_mode, size / nbytes);
7459 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7460 if (nunits <= 1)
7461 return NULL_TREE;
7463 vectype = build_vector_type (scalar_type, nunits);
7465 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7466 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
7467 return NULL_TREE;
7469 return vectype;
7472 unsigned int current_vector_size;
7474 /* Function get_vectype_for_scalar_type.
7476 Returns the vector type corresponding to SCALAR_TYPE as supported
7477 by the target. */
7479 tree
7480 get_vectype_for_scalar_type (tree scalar_type)
7482 tree vectype;
7483 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7484 current_vector_size);
7485 if (vectype
7486 && current_vector_size == 0)
7487 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7488 return vectype;
7491 /* Function get_same_sized_vectype
7493 Returns a vector type corresponding to SCALAR_TYPE of size
7494 VECTOR_TYPE if supported by the target. */
7496 tree
7497 get_same_sized_vectype (tree scalar_type, tree vector_type)
7499 return get_vectype_for_scalar_type_and_size
7500 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
7503 /* Function vect_is_simple_use.
7505 Input:
7506 LOOP_VINFO - the vect info of the loop that is being vectorized.
7507 BB_VINFO - the vect info of the basic block that is being vectorized.
7508 OPERAND - operand of STMT in the loop or bb.
7509 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7511 Returns whether a stmt with OPERAND can be vectorized.
7512 For loops, supportable operands are constants, loop invariants, and operands
7513 that are defined by the current iteration of the loop. Unsupportable
7514 operands are those that are defined by a previous iteration of the loop (as
7515 is the case in reduction/induction computations).
7516 For basic blocks, supportable operands are constants and bb invariants.
7517 For now, operands defined outside the basic block are not supported. */
7519 bool
7520 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7521 bb_vec_info bb_vinfo, gimple *def_stmt,
7522 tree *def, enum vect_def_type *dt)
7524 basic_block bb;
7525 stmt_vec_info stmt_vinfo;
7526 struct loop *loop = NULL;
7528 if (loop_vinfo)
7529 loop = LOOP_VINFO_LOOP (loop_vinfo);
7531 *def_stmt = NULL;
7532 *def = NULL_TREE;
7534 if (dump_enabled_p ())
7536 dump_printf_loc (MSG_NOTE, vect_location,
7537 "vect_is_simple_use: operand ");
7538 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
7539 dump_printf (MSG_NOTE, "\n");
7542 if (CONSTANT_CLASS_P (operand))
7544 *dt = vect_constant_def;
7545 return true;
7548 if (is_gimple_min_invariant (operand))
7550 *def = operand;
7551 *dt = vect_external_def;
7552 return true;
7555 if (TREE_CODE (operand) == PAREN_EXPR)
7557 if (dump_enabled_p ())
7558 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
7559 operand = TREE_OPERAND (operand, 0);
7562 if (TREE_CODE (operand) != SSA_NAME)
7564 if (dump_enabled_p ())
7565 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7566 "not ssa-name.\n");
7567 return false;
7570 *def_stmt = SSA_NAME_DEF_STMT (operand);
7571 if (*def_stmt == NULL)
7573 if (dump_enabled_p ())
7574 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7575 "no def_stmt.\n");
7576 return false;
7579 if (dump_enabled_p ())
7581 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
7582 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
7583 dump_printf (MSG_NOTE, "\n");
7586 /* Empty stmt is expected only in case of a function argument.
7587 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7588 if (gimple_nop_p (*def_stmt))
7590 *def = operand;
7591 *dt = vect_external_def;
7592 return true;
7595 bb = gimple_bb (*def_stmt);
7597 if ((loop && !flow_bb_inside_loop_p (loop, bb))
7598 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
7599 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
7600 *dt = vect_external_def;
7601 else
7603 stmt_vinfo = vinfo_for_stmt (*def_stmt);
7604 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
7607 if (*dt == vect_unknown_def_type
7608 || (stmt
7609 && *dt == vect_double_reduction_def
7610 && gimple_code (stmt) != GIMPLE_PHI))
7612 if (dump_enabled_p ())
7613 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7614 "Unsupported pattern.\n");
7615 return false;
7618 if (dump_enabled_p ())
7619 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt);
7621 switch (gimple_code (*def_stmt))
7623 case GIMPLE_PHI:
7624 *def = gimple_phi_result (*def_stmt);
7625 break;
7627 case GIMPLE_ASSIGN:
7628 *def = gimple_assign_lhs (*def_stmt);
7629 break;
7631 case GIMPLE_CALL:
7632 *def = gimple_call_lhs (*def_stmt);
7633 if (*def != NULL)
7634 break;
7635 /* FALLTHRU */
7636 default:
7637 if (dump_enabled_p ())
7638 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7639 "unsupported defining stmt:\n");
7640 return false;
7643 return true;
7646 /* Function vect_is_simple_use_1.
7648 Same as vect_is_simple_use_1 but also determines the vector operand
7649 type of OPERAND and stores it to *VECTYPE. If the definition of
7650 OPERAND is vect_uninitialized_def, vect_constant_def or
7651 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
7652 is responsible to compute the best suited vector type for the
7653 scalar operand. */
7655 bool
7656 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7657 bb_vec_info bb_vinfo, gimple *def_stmt,
7658 tree *def, enum vect_def_type *dt, tree *vectype)
7660 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
7661 def, dt))
7662 return false;
7664 /* Now get a vector type if the def is internal, otherwise supply
7665 NULL_TREE and leave it up to the caller to figure out a proper
7666 type for the use stmt. */
7667 if (*dt == vect_internal_def
7668 || *dt == vect_induction_def
7669 || *dt == vect_reduction_def
7670 || *dt == vect_double_reduction_def
7671 || *dt == vect_nested_cycle)
7673 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
7675 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7676 && !STMT_VINFO_RELEVANT (stmt_info)
7677 && !STMT_VINFO_LIVE_P (stmt_info))
7678 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7680 *vectype = STMT_VINFO_VECTYPE (stmt_info);
7681 gcc_assert (*vectype != NULL_TREE);
7683 else if (*dt == vect_uninitialized_def
7684 || *dt == vect_constant_def
7685 || *dt == vect_external_def)
7686 *vectype = NULL_TREE;
7687 else
7688 gcc_unreachable ();
7690 return true;
7694 /* Function supportable_widening_operation
7696 Check whether an operation represented by the code CODE is a
7697 widening operation that is supported by the target platform in
7698 vector form (i.e., when operating on arguments of type VECTYPE_IN
7699 producing a result of type VECTYPE_OUT).
7701 Widening operations we currently support are NOP (CONVERT), FLOAT
7702 and WIDEN_MULT. This function checks if these operations are supported
7703 by the target platform either directly (via vector tree-codes), or via
7704 target builtins.
7706 Output:
7707 - CODE1 and CODE2 are codes of vector operations to be used when
7708 vectorizing the operation, if available.
7709 - MULTI_STEP_CVT determines the number of required intermediate steps in
7710 case of multi-step conversion (like char->short->int - in that case
7711 MULTI_STEP_CVT will be 1).
7712 - INTERM_TYPES contains the intermediate type required to perform the
7713 widening operation (short in the above example). */
7715 bool
7716 supportable_widening_operation (enum tree_code code, gimple stmt,
7717 tree vectype_out, tree vectype_in,
7718 enum tree_code *code1, enum tree_code *code2,
7719 int *multi_step_cvt,
7720 vec<tree> *interm_types)
7722 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7723 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
7724 struct loop *vect_loop = NULL;
7725 enum machine_mode vec_mode;
7726 enum insn_code icode1, icode2;
7727 optab optab1, optab2;
7728 tree vectype = vectype_in;
7729 tree wide_vectype = vectype_out;
7730 enum tree_code c1, c2;
7731 int i;
7732 tree prev_type, intermediate_type;
7733 enum machine_mode intermediate_mode, prev_mode;
7734 optab optab3, optab4;
7736 *multi_step_cvt = 0;
7737 if (loop_info)
7738 vect_loop = LOOP_VINFO_LOOP (loop_info);
7740 switch (code)
7742 case WIDEN_MULT_EXPR:
7743 /* The result of a vectorized widening operation usually requires
7744 two vectors (because the widened results do not fit into one vector).
7745 The generated vector results would normally be expected to be
7746 generated in the same order as in the original scalar computation,
7747 i.e. if 8 results are generated in each vector iteration, they are
7748 to be organized as follows:
7749 vect1: [res1,res2,res3,res4],
7750 vect2: [res5,res6,res7,res8].
7752 However, in the special case that the result of the widening
7753 operation is used in a reduction computation only, the order doesn't
7754 matter (because when vectorizing a reduction we change the order of
7755 the computation). Some targets can take advantage of this and
7756 generate more efficient code. For example, targets like Altivec,
7757 that support widen_mult using a sequence of {mult_even,mult_odd}
7758 generate the following vectors:
7759 vect1: [res1,res3,res5,res7],
7760 vect2: [res2,res4,res6,res8].
7762 When vectorizing outer-loops, we execute the inner-loop sequentially
7763 (each vectorized inner-loop iteration contributes to VF outer-loop
7764 iterations in parallel). We therefore don't allow to change the
7765 order of the computation in the inner-loop during outer-loop
7766 vectorization. */
7767 /* TODO: Another case in which order doesn't *really* matter is when we
7768 widen and then contract again, e.g. (short)((int)x * y >> 8).
7769 Normally, pack_trunc performs an even/odd permute, whereas the
7770 repack from an even/odd expansion would be an interleave, which
7771 would be significantly simpler for e.g. AVX2. */
7772 /* In any case, in order to avoid duplicating the code below, recurse
7773 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
7774 are properly set up for the caller. If we fail, we'll continue with
7775 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
7776 if (vect_loop
7777 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
7778 && !nested_in_vect_loop_p (vect_loop, stmt)
7779 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
7780 stmt, vectype_out, vectype_in,
7781 code1, code2, multi_step_cvt,
7782 interm_types))
7783 return true;
7784 c1 = VEC_WIDEN_MULT_LO_EXPR;
7785 c2 = VEC_WIDEN_MULT_HI_EXPR;
7786 break;
7788 case VEC_WIDEN_MULT_EVEN_EXPR:
7789 /* Support the recursion induced just above. */
7790 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
7791 c2 = VEC_WIDEN_MULT_ODD_EXPR;
7792 break;
7794 case WIDEN_LSHIFT_EXPR:
7795 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
7796 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
7797 break;
7799 CASE_CONVERT:
7800 c1 = VEC_UNPACK_LO_EXPR;
7801 c2 = VEC_UNPACK_HI_EXPR;
7802 break;
7804 case FLOAT_EXPR:
7805 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
7806 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
7807 break;
7809 case FIX_TRUNC_EXPR:
7810 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
7811 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
7812 computing the operation. */
7813 return false;
7815 default:
7816 gcc_unreachable ();
7819 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
7821 enum tree_code ctmp = c1;
7822 c1 = c2;
7823 c2 = ctmp;
7826 if (code == FIX_TRUNC_EXPR)
7828 /* The signedness is determined from output operand. */
7829 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
7830 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
7832 else
7834 optab1 = optab_for_tree_code (c1, vectype, optab_default);
7835 optab2 = optab_for_tree_code (c2, vectype, optab_default);
7838 if (!optab1 || !optab2)
7839 return false;
7841 vec_mode = TYPE_MODE (vectype);
7842 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
7843 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
7844 return false;
7846 *code1 = c1;
7847 *code2 = c2;
7849 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
7850 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
7851 return true;
7853 /* Check if it's a multi-step conversion that can be done using intermediate
7854 types. */
7856 prev_type = vectype;
7857 prev_mode = vec_mode;
7859 if (!CONVERT_EXPR_CODE_P (code))
7860 return false;
7862 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
7863 intermediate steps in promotion sequence. We try
7864 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
7865 not. */
7866 interm_types->create (MAX_INTERM_CVT_STEPS);
7867 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
7869 intermediate_mode = insn_data[icode1].operand[0].mode;
7870 intermediate_type
7871 = lang_hooks.types.type_for_mode (intermediate_mode,
7872 TYPE_UNSIGNED (prev_type));
7873 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
7874 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
7876 if (!optab3 || !optab4
7877 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
7878 || insn_data[icode1].operand[0].mode != intermediate_mode
7879 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
7880 || insn_data[icode2].operand[0].mode != intermediate_mode
7881 || ((icode1 = optab_handler (optab3, intermediate_mode))
7882 == CODE_FOR_nothing)
7883 || ((icode2 = optab_handler (optab4, intermediate_mode))
7884 == CODE_FOR_nothing))
7885 break;
7887 interm_types->quick_push (intermediate_type);
7888 (*multi_step_cvt)++;
7890 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
7891 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
7892 return true;
7894 prev_type = intermediate_type;
7895 prev_mode = intermediate_mode;
7898 interm_types->release ();
7899 return false;
7903 /* Function supportable_narrowing_operation
7905 Check whether an operation represented by the code CODE is a
7906 narrowing operation that is supported by the target platform in
7907 vector form (i.e., when operating on arguments of type VECTYPE_IN
7908 and producing a result of type VECTYPE_OUT).
7910 Narrowing operations we currently support are NOP (CONVERT) and
7911 FIX_TRUNC. This function checks if these operations are supported by
7912 the target platform directly via vector tree-codes.
7914 Output:
7915 - CODE1 is the code of a vector operation to be used when
7916 vectorizing the operation, if available.
7917 - MULTI_STEP_CVT determines the number of required intermediate steps in
7918 case of multi-step conversion (like int->short->char - in that case
7919 MULTI_STEP_CVT will be 1).
7920 - INTERM_TYPES contains the intermediate type required to perform the
7921 narrowing operation (short in the above example). */
7923 bool
7924 supportable_narrowing_operation (enum tree_code code,
7925 tree vectype_out, tree vectype_in,
7926 enum tree_code *code1, int *multi_step_cvt,
7927 vec<tree> *interm_types)
7929 enum machine_mode vec_mode;
7930 enum insn_code icode1;
7931 optab optab1, interm_optab;
7932 tree vectype = vectype_in;
7933 tree narrow_vectype = vectype_out;
7934 enum tree_code c1;
7935 tree intermediate_type;
7936 enum machine_mode intermediate_mode, prev_mode;
7937 int i;
7938 bool uns;
7940 *multi_step_cvt = 0;
7941 switch (code)
7943 CASE_CONVERT:
7944 c1 = VEC_PACK_TRUNC_EXPR;
7945 break;
7947 case FIX_TRUNC_EXPR:
7948 c1 = VEC_PACK_FIX_TRUNC_EXPR;
7949 break;
7951 case FLOAT_EXPR:
7952 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
7953 tree code and optabs used for computing the operation. */
7954 return false;
7956 default:
7957 gcc_unreachable ();
7960 if (code == FIX_TRUNC_EXPR)
7961 /* The signedness is determined from output operand. */
7962 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
7963 else
7964 optab1 = optab_for_tree_code (c1, vectype, optab_default);
7966 if (!optab1)
7967 return false;
7969 vec_mode = TYPE_MODE (vectype);
7970 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
7971 return false;
7973 *code1 = c1;
7975 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
7976 return true;
7978 /* Check if it's a multi-step conversion that can be done using intermediate
7979 types. */
7980 prev_mode = vec_mode;
7981 if (code == FIX_TRUNC_EXPR)
7982 uns = TYPE_UNSIGNED (vectype_out);
7983 else
7984 uns = TYPE_UNSIGNED (vectype);
7986 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
7987 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
7988 costly than signed. */
7989 if (code == FIX_TRUNC_EXPR && uns)
7991 enum insn_code icode2;
7993 intermediate_type
7994 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
7995 interm_optab
7996 = optab_for_tree_code (c1, intermediate_type, optab_default);
7997 if (interm_optab != unknown_optab
7998 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
7999 && insn_data[icode1].operand[0].mode
8000 == insn_data[icode2].operand[0].mode)
8002 uns = false;
8003 optab1 = interm_optab;
8004 icode1 = icode2;
8008 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8009 intermediate steps in promotion sequence. We try
8010 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8011 interm_types->create (MAX_INTERM_CVT_STEPS);
8012 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8014 intermediate_mode = insn_data[icode1].operand[0].mode;
8015 intermediate_type
8016 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8017 interm_optab
8018 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8019 optab_default);
8020 if (!interm_optab
8021 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8022 || insn_data[icode1].operand[0].mode != intermediate_mode
8023 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8024 == CODE_FOR_nothing))
8025 break;
8027 interm_types->quick_push (intermediate_type);
8028 (*multi_step_cvt)++;
8030 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8031 return true;
8033 prev_mode = intermediate_mode;
8034 optab1 = interm_optab;
8037 interm_types->release ();
8038 return false;