Merge aosp-toolchain/gcc/gcc-4_9 changes.
[official-gcc.git] / gcc-4_9 / gcc / tree-vect-stmts.c
blob7d5366136559a82ff495b5ebb02c3eda3b86a072
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "stor-layout.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-ssa-alias.h"
33 #include "internal-fn.h"
34 #include "tree-eh.h"
35 #include "gimple-expr.h"
36 #include "is-a.h"
37 #include "gimple.h"
38 #include "gimplify.h"
39 #include "gimple-iterator.h"
40 #include "gimplify-me.h"
41 #include "gimple-ssa.h"
42 #include "tree-cfg.h"
43 #include "tree-phinodes.h"
44 #include "ssa-iterators.h"
45 #include "stringpool.h"
46 #include "tree-ssanames.h"
47 #include "tree-ssa-loop-manip.h"
48 #include "cfgloop.h"
49 #include "tree-ssa-loop.h"
50 #include "tree-scalar-evolution.h"
51 #include "expr.h"
52 #include "recog.h" /* FIXME: for insn_data */
53 #include "optabs.h"
54 #include "diagnostic-core.h"
55 #include "tree-vectorizer.h"
56 #include "dumpfile.h"
57 #include "cgraph.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
64 tree
65 stmt_vectype (struct _stmt_vec_info *stmt_info)
67 return STMT_VINFO_VECTYPE (stmt_info);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
75 gimple stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
78 struct loop* loop;
80 if (!loop_vinfo)
81 return false;
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
85 return (bb->loop_father == loop->inner);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 int misalign, enum vect_cost_model_location where)
97 if (body_cost_vec)
99 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
100 add_stmt_info_to_vec (body_cost_vec, count, kind,
101 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
102 misalign);
103 return (unsigned)
104 (builtin_vectorization_cost (kind, vectype, misalign) * count);
107 else
109 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
110 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
111 void *target_cost_data;
113 if (loop_vinfo)
114 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
115 else
116 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
118 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
119 misalign, where);
123 /* Return a variable of type ELEM_TYPE[NELEMS]. */
125 static tree
126 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
128 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
129 "vect_array");
132 /* ARRAY is an array of vectors created by create_vector_array.
133 Return an SSA_NAME for the vector in index N. The reference
134 is part of the vectorization of STMT and the vector is associated
135 with scalar destination SCALAR_DEST. */
137 static tree
138 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
139 tree array, unsigned HOST_WIDE_INT n)
141 tree vect_type, vect, vect_name, array_ref;
142 gimple new_stmt;
144 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
145 vect_type = TREE_TYPE (TREE_TYPE (array));
146 vect = vect_create_destination_var (scalar_dest, vect_type);
147 array_ref = build4 (ARRAY_REF, vect_type, array,
148 build_int_cst (size_type_node, n),
149 NULL_TREE, NULL_TREE);
151 new_stmt = gimple_build_assign (vect, array_ref);
152 vect_name = make_ssa_name (vect, new_stmt);
153 gimple_assign_set_lhs (new_stmt, vect_name);
154 vect_finish_stmt_generation (stmt, new_stmt, gsi);
156 return vect_name;
159 /* ARRAY is an array of vectors created by create_vector_array.
160 Emit code to store SSA_NAME VECT in index N of the array.
161 The store is part of the vectorization of STMT. */
163 static void
164 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
165 tree array, unsigned HOST_WIDE_INT n)
167 tree array_ref;
168 gimple new_stmt;
170 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
171 build_int_cst (size_type_node, n),
172 NULL_TREE, NULL_TREE);
174 new_stmt = gimple_build_assign (array_ref, vect);
175 vect_finish_stmt_generation (stmt, new_stmt, gsi);
178 /* PTR is a pointer to an array of type TYPE. Return a representation
179 of *PTR. The memory reference replaces those in FIRST_DR
180 (and its group). */
182 static tree
183 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
185 tree mem_ref, alias_ptr_type;
187 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
188 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
189 /* Arrays have the same alignment as their type. */
190 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
191 return mem_ref;
194 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
196 /* Function vect_mark_relevant.
198 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
200 static void
201 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
202 enum vect_relevant relevant, bool live_p,
203 bool used_in_pattern)
205 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
208 gimple pattern_stmt;
210 if (dump_enabled_p ())
211 dump_printf_loc (MSG_NOTE, vect_location,
212 "mark relevant %d, live %d.\n", relevant, live_p);
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
220 bool found = false;
221 if (!used_in_pattern)
223 imm_use_iterator imm_iter;
224 use_operand_p use_p;
225 gimple use_stmt;
226 tree lhs;
227 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
228 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
230 if (is_gimple_assign (stmt))
231 lhs = gimple_assign_lhs (stmt);
232 else
233 lhs = gimple_call_lhs (stmt);
235 /* This use is out of pattern use, if LHS has other uses that are
236 pattern uses, we should mark the stmt itself, and not the pattern
237 stmt. */
238 if (lhs && TREE_CODE (lhs) == SSA_NAME)
239 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
241 if (is_gimple_debug (USE_STMT (use_p)))
242 continue;
243 use_stmt = USE_STMT (use_p);
245 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
246 continue;
248 if (vinfo_for_stmt (use_stmt)
249 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
251 found = true;
252 break;
257 if (!found)
259 /* This is the last stmt in a sequence that was detected as a
260 pattern that can potentially be vectorized. Don't mark the stmt
261 as relevant/live because it's not going to be vectorized.
262 Instead mark the pattern-stmt that replaces it. */
264 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
266 if (dump_enabled_p ())
267 dump_printf_loc (MSG_NOTE, vect_location,
268 "last stmt in pattern. don't mark"
269 " relevant/live.\n");
270 stmt_info = vinfo_for_stmt (pattern_stmt);
271 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
272 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
273 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
274 stmt = pattern_stmt;
278 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
279 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
280 STMT_VINFO_RELEVANT (stmt_info) = relevant;
282 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
283 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
285 if (dump_enabled_p ())
286 dump_printf_loc (MSG_NOTE, vect_location,
287 "already marked relevant/live.\n");
288 return;
291 worklist->safe_push (stmt);
295 /* Function vect_stmt_relevant_p.
297 Return true if STMT in loop that is represented by LOOP_VINFO is
298 "relevant for vectorization".
300 A stmt is considered "relevant for vectorization" if:
301 - it has uses outside the loop.
302 - it has vdefs (it alters memory).
303 - control stmts in the loop (except for the exit condition).
305 CHECKME: what other side effects would the vectorizer allow? */
307 static bool
308 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
309 enum vect_relevant *relevant, bool *live_p)
311 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
312 ssa_op_iter op_iter;
313 imm_use_iterator imm_iter;
314 use_operand_p use_p;
315 def_operand_p def_p;
317 *relevant = vect_unused_in_scope;
318 *live_p = false;
320 /* cond stmt other than loop exit cond. */
321 if (is_ctrl_stmt (stmt)
322 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
323 != loop_exit_ctrl_vec_info_type)
324 *relevant = vect_used_in_scope;
326 /* changing memory. */
327 if (gimple_code (stmt) != GIMPLE_PHI)
328 if (gimple_vdef (stmt)
329 && !gimple_clobber_p (stmt))
331 if (dump_enabled_p ())
332 dump_printf_loc (MSG_NOTE, vect_location,
333 "vec_stmt_relevant_p: stmt has vdefs.\n");
334 *relevant = vect_used_in_scope;
337 /* uses outside the loop. */
338 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
340 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
342 basic_block bb = gimple_bb (USE_STMT (use_p));
343 if (!flow_bb_inside_loop_p (loop, bb))
345 if (dump_enabled_p ())
346 dump_printf_loc (MSG_NOTE, vect_location,
347 "vec_stmt_relevant_p: used out of loop.\n");
349 if (is_gimple_debug (USE_STMT (use_p)))
350 continue;
352 /* We expect all such uses to be in the loop exit phis
353 (because of loop closed form) */
354 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
355 gcc_assert (bb == single_exit (loop)->dest);
357 *live_p = true;
362 return (*live_p || *relevant);
366 /* Function exist_non_indexing_operands_for_use_p
368 USE is one of the uses attached to STMT. Check if USE is
369 used in STMT for anything other than indexing an array. */
371 static bool
372 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
374 tree operand;
375 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
377 /* USE corresponds to some operand in STMT. If there is no data
378 reference in STMT, then any operand that corresponds to USE
379 is not indexing an array. */
380 if (!STMT_VINFO_DATA_REF (stmt_info))
381 return true;
383 /* STMT has a data_ref. FORNOW this means that its of one of
384 the following forms:
385 -1- ARRAY_REF = var
386 -2- var = ARRAY_REF
387 (This should have been verified in analyze_data_refs).
389 'var' in the second case corresponds to a def, not a use,
390 so USE cannot correspond to any operands that are not used
391 for array indexing.
393 Therefore, all we need to check is if STMT falls into the
394 first case, and whether var corresponds to USE. */
396 if (!gimple_assign_copy_p (stmt))
398 if (is_gimple_call (stmt)
399 && gimple_call_internal_p (stmt))
400 switch (gimple_call_internal_fn (stmt))
402 case IFN_MASK_STORE:
403 operand = gimple_call_arg (stmt, 3);
404 if (operand == use)
405 return true;
406 /* FALLTHRU */
407 case IFN_MASK_LOAD:
408 operand = gimple_call_arg (stmt, 2);
409 if (operand == use)
410 return true;
411 break;
412 default:
413 break;
415 return false;
418 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
419 return false;
420 operand = gimple_assign_rhs1 (stmt);
421 if (TREE_CODE (operand) != SSA_NAME)
422 return false;
424 if (operand == use)
425 return true;
427 return false;
432 Function process_use.
434 Inputs:
435 - a USE in STMT in a loop represented by LOOP_VINFO
436 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
437 that defined USE. This is done by calling mark_relevant and passing it
438 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
439 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
440 be performed.
442 Outputs:
443 Generally, LIVE_P and RELEVANT are used to define the liveness and
444 relevance info of the DEF_STMT of this USE:
445 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
446 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
447 Exceptions:
448 - case 1: If USE is used only for address computations (e.g. array indexing),
449 which does not need to be directly vectorized, then the liveness/relevance
450 of the respective DEF_STMT is left unchanged.
451 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
452 skip DEF_STMT cause it had already been processed.
453 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
454 be modified accordingly.
456 Return true if everything is as expected. Return false otherwise. */
458 static bool
459 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
460 enum vect_relevant relevant, vec<gimple> *worklist,
461 bool force)
463 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
464 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
465 stmt_vec_info dstmt_vinfo;
466 basic_block bb, def_bb;
467 tree def;
468 gimple def_stmt;
469 enum vect_def_type dt;
471 /* case 1: we are only interested in uses that need to be vectorized. Uses
472 that are used for address computation are not considered relevant. */
473 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
474 return true;
476 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
478 if (dump_enabled_p ())
479 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
480 "not vectorized: unsupported use in stmt.\n");
481 return false;
484 if (!def_stmt || gimple_nop_p (def_stmt))
485 return true;
487 def_bb = gimple_bb (def_stmt);
488 if (!flow_bb_inside_loop_p (loop, def_bb))
490 if (dump_enabled_p ())
491 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
492 return true;
495 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
496 DEF_STMT must have already been processed, because this should be the
497 only way that STMT, which is a reduction-phi, was put in the worklist,
498 as there should be no other uses for DEF_STMT in the loop. So we just
499 check that everything is as expected, and we are done. */
500 dstmt_vinfo = vinfo_for_stmt (def_stmt);
501 bb = gimple_bb (stmt);
502 if (gimple_code (stmt) == GIMPLE_PHI
503 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
504 && gimple_code (def_stmt) != GIMPLE_PHI
505 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
506 && bb->loop_father == def_bb->loop_father)
508 if (dump_enabled_p ())
509 dump_printf_loc (MSG_NOTE, vect_location,
510 "reduc-stmt defining reduc-phi in the same nest.\n");
511 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
512 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
513 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
514 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
515 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
516 return true;
519 /* case 3a: outer-loop stmt defining an inner-loop stmt:
520 outer-loop-header-bb:
521 d = def_stmt
522 inner-loop:
523 stmt # use (d)
524 outer-loop-tail-bb:
525 ... */
526 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
528 if (dump_enabled_p ())
529 dump_printf_loc (MSG_NOTE, vect_location,
530 "outer-loop def-stmt defining inner-loop stmt.\n");
532 switch (relevant)
534 case vect_unused_in_scope:
535 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
536 vect_used_in_scope : vect_unused_in_scope;
537 break;
539 case vect_used_in_outer_by_reduction:
540 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
541 relevant = vect_used_by_reduction;
542 break;
544 case vect_used_in_outer:
545 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
546 relevant = vect_used_in_scope;
547 break;
549 case vect_used_in_scope:
550 break;
552 default:
553 gcc_unreachable ();
557 /* case 3b: inner-loop stmt defining an outer-loop stmt:
558 outer-loop-header-bb:
560 inner-loop:
561 d = def_stmt
562 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
563 stmt # use (d) */
564 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
566 if (dump_enabled_p ())
567 dump_printf_loc (MSG_NOTE, vect_location,
568 "inner-loop def-stmt defining outer-loop stmt.\n");
570 switch (relevant)
572 case vect_unused_in_scope:
573 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
574 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
575 vect_used_in_outer_by_reduction : vect_unused_in_scope;
576 break;
578 case vect_used_by_reduction:
579 relevant = vect_used_in_outer_by_reduction;
580 break;
582 case vect_used_in_scope:
583 relevant = vect_used_in_outer;
584 break;
586 default:
587 gcc_unreachable ();
591 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
592 is_pattern_stmt_p (stmt_vinfo));
593 return true;
597 /* Function vect_mark_stmts_to_be_vectorized.
599 Not all stmts in the loop need to be vectorized. For example:
601 for i...
602 for j...
603 1. T0 = i + j
604 2. T1 = a[T0]
606 3. j = j + 1
608 Stmt 1 and 3 do not need to be vectorized, because loop control and
609 addressing of vectorized data-refs are handled differently.
611 This pass detects such stmts. */
613 bool
614 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
616 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
617 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
618 unsigned int nbbs = loop->num_nodes;
619 gimple_stmt_iterator si;
620 gimple stmt;
621 unsigned int i;
622 stmt_vec_info stmt_vinfo;
623 basic_block bb;
624 gimple phi;
625 bool live_p;
626 enum vect_relevant relevant, tmp_relevant;
627 enum vect_def_type def_type;
629 if (dump_enabled_p ())
630 dump_printf_loc (MSG_NOTE, vect_location,
631 "=== vect_mark_stmts_to_be_vectorized ===\n");
633 auto_vec<gimple, 64> worklist;
635 /* 1. Init worklist. */
636 for (i = 0; i < nbbs; i++)
638 bb = bbs[i];
639 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
641 phi = gsi_stmt (si);
642 if (dump_enabled_p ())
644 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
645 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
646 dump_printf (MSG_NOTE, "\n");
649 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
650 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
652 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
654 stmt = gsi_stmt (si);
655 if (dump_enabled_p ())
657 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
658 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
659 dump_printf (MSG_NOTE, "\n");
662 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
663 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
667 /* 2. Process_worklist */
668 while (worklist.length () > 0)
670 use_operand_p use_p;
671 ssa_op_iter iter;
673 stmt = worklist.pop ();
674 if (dump_enabled_p ())
676 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
677 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
678 dump_printf (MSG_NOTE, "\n");
681 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
682 (DEF_STMT) as relevant/irrelevant and live/dead according to the
683 liveness and relevance properties of STMT. */
684 stmt_vinfo = vinfo_for_stmt (stmt);
685 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
686 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
688 /* Generally, the liveness and relevance properties of STMT are
689 propagated as is to the DEF_STMTs of its USEs:
690 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
691 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
693 One exception is when STMT has been identified as defining a reduction
694 variable; in this case we set the liveness/relevance as follows:
695 live_p = false
696 relevant = vect_used_by_reduction
697 This is because we distinguish between two kinds of relevant stmts -
698 those that are used by a reduction computation, and those that are
699 (also) used by a regular computation. This allows us later on to
700 identify stmts that are used solely by a reduction, and therefore the
701 order of the results that they produce does not have to be kept. */
703 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
704 tmp_relevant = relevant;
705 switch (def_type)
707 case vect_reduction_def:
708 switch (tmp_relevant)
710 case vect_unused_in_scope:
711 relevant = vect_used_by_reduction;
712 break;
714 case vect_used_by_reduction:
715 if (gimple_code (stmt) == GIMPLE_PHI)
716 break;
717 /* fall through */
719 default:
720 if (dump_enabled_p ())
721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
722 "unsupported use of reduction.\n");
723 return false;
726 live_p = false;
727 break;
729 case vect_nested_cycle:
730 if (tmp_relevant != vect_unused_in_scope
731 && tmp_relevant != vect_used_in_outer_by_reduction
732 && tmp_relevant != vect_used_in_outer)
734 if (dump_enabled_p ())
735 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
736 "unsupported use of nested cycle.\n");
738 return false;
741 live_p = false;
742 break;
744 case vect_double_reduction_def:
745 if (tmp_relevant != vect_unused_in_scope
746 && tmp_relevant != vect_used_by_reduction)
748 if (dump_enabled_p ())
749 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
750 "unsupported use of double reduction.\n");
752 return false;
755 live_p = false;
756 break;
758 default:
759 break;
762 if (is_pattern_stmt_p (stmt_vinfo))
764 /* Pattern statements are not inserted into the code, so
765 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
766 have to scan the RHS or function arguments instead. */
767 if (is_gimple_assign (stmt))
769 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
770 tree op = gimple_assign_rhs1 (stmt);
772 i = 1;
773 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
775 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
776 live_p, relevant, &worklist, false)
777 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
778 live_p, relevant, &worklist, false))
779 return false;
780 i = 2;
782 for (; i < gimple_num_ops (stmt); i++)
784 op = gimple_op (stmt, i);
785 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
786 &worklist, false))
787 return false;
790 else if (is_gimple_call (stmt))
792 for (i = 0; i < gimple_call_num_args (stmt); i++)
794 tree arg = gimple_call_arg (stmt, i);
795 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
796 &worklist, false))
797 return false;
801 else
802 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
804 tree op = USE_FROM_PTR (use_p);
805 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
806 &worklist, false))
807 return false;
810 if (STMT_VINFO_GATHER_P (stmt_vinfo))
812 tree off;
813 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
814 gcc_assert (decl);
815 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
816 &worklist, true))
817 return false;
819 } /* while worklist */
821 return true;
825 /* Function vect_model_simple_cost.
827 Models cost for simple operations, i.e. those that only emit ncopies of a
828 single op. Right now, this does not account for multiple insns that could
829 be generated for the single vector op. We will handle that shortly. */
831 void
832 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
833 enum vect_def_type *dt,
834 stmt_vector_for_cost *prologue_cost_vec,
835 stmt_vector_for_cost *body_cost_vec)
837 int i;
838 int inside_cost = 0, prologue_cost = 0;
840 /* The SLP costs were already calculated during SLP tree build. */
841 if (PURE_SLP_STMT (stmt_info))
842 return;
844 /* FORNOW: Assuming maximum 2 args per stmts. */
845 for (i = 0; i < 2; i++)
846 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
847 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
848 stmt_info, 0, vect_prologue);
850 /* Pass the inside-of-loop statements to the target-specific cost model. */
851 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
852 stmt_info, 0, vect_body);
854 if (dump_enabled_p ())
855 dump_printf_loc (MSG_NOTE, vect_location,
856 "vect_model_simple_cost: inside_cost = %d, "
857 "prologue_cost = %d .\n", inside_cost, prologue_cost);
861 /* Model cost for type demotion and promotion operations. PWR is normally
862 zero for single-step promotions and demotions. It will be one if
863 two-step promotion/demotion is required, and so on. Each additional
864 step doubles the number of instructions required. */
866 static void
867 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
868 enum vect_def_type *dt, int pwr)
870 int i, tmp;
871 int inside_cost = 0, prologue_cost = 0;
872 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
873 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
874 void *target_cost_data;
876 /* The SLP costs were already calculated during SLP tree build. */
877 if (PURE_SLP_STMT (stmt_info))
878 return;
880 if (loop_vinfo)
881 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
882 else
883 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
885 for (i = 0; i < pwr + 1; i++)
887 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
888 (i + 1) : i;
889 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
890 vec_promote_demote, stmt_info, 0,
891 vect_body);
894 /* FORNOW: Assuming maximum 2 args per stmts. */
895 for (i = 0; i < 2; i++)
896 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
897 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
898 stmt_info, 0, vect_prologue);
900 if (dump_enabled_p ())
901 dump_printf_loc (MSG_NOTE, vect_location,
902 "vect_model_promotion_demotion_cost: inside_cost = %d, "
903 "prologue_cost = %d .\n", inside_cost, prologue_cost);
906 /* Function vect_cost_group_size
908 For grouped load or store, return the group_size only if it is the first
909 load or store of a group, else return 1. This ensures that group size is
910 only returned once per group. */
912 static int
913 vect_cost_group_size (stmt_vec_info stmt_info)
915 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
917 if (first_stmt == STMT_VINFO_STMT (stmt_info))
918 return GROUP_SIZE (stmt_info);
920 return 1;
924 /* Function vect_model_store_cost
926 Models cost for stores. In the case of grouped accesses, one access
927 has the overhead of the grouped access attributed to it. */
929 void
930 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
931 bool store_lanes_p, enum vect_def_type dt,
932 slp_tree slp_node,
933 stmt_vector_for_cost *prologue_cost_vec,
934 stmt_vector_for_cost *body_cost_vec)
936 int group_size;
937 unsigned int inside_cost = 0, prologue_cost = 0;
938 struct data_reference *first_dr;
939 gimple first_stmt;
941 /* The SLP costs were already calculated during SLP tree build. */
942 if (PURE_SLP_STMT (stmt_info))
943 return;
945 if (dt == vect_constant_def || dt == vect_external_def)
946 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
947 stmt_info, 0, vect_prologue);
949 /* Grouped access? */
950 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
952 if (slp_node)
954 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
955 group_size = 1;
957 else
959 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
960 group_size = vect_cost_group_size (stmt_info);
963 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
965 /* Not a grouped access. */
966 else
968 group_size = 1;
969 first_dr = STMT_VINFO_DATA_REF (stmt_info);
972 /* We assume that the cost of a single store-lanes instruction is
973 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
974 access is instead being provided by a permute-and-store operation,
975 include the cost of the permutes. */
976 if (!store_lanes_p && group_size > 1)
978 /* Uses a high and low interleave or shuffle operations for each
979 needed permute. */
980 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
981 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
982 stmt_info, 0, vect_body);
984 if (dump_enabled_p ())
985 dump_printf_loc (MSG_NOTE, vect_location,
986 "vect_model_store_cost: strided group_size = %d .\n",
987 group_size);
990 /* Costs of the stores. */
991 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
993 if (dump_enabled_p ())
994 dump_printf_loc (MSG_NOTE, vect_location,
995 "vect_model_store_cost: inside_cost = %d, "
996 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1000 /* Calculate cost of DR's memory access. */
1001 void
1002 vect_get_store_cost (struct data_reference *dr, int ncopies,
1003 unsigned int *inside_cost,
1004 stmt_vector_for_cost *body_cost_vec)
1006 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1007 gimple stmt = DR_STMT (dr);
1008 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1010 switch (alignment_support_scheme)
1012 case dr_aligned:
1014 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1015 vector_store, stmt_info, 0,
1016 vect_body);
1018 if (dump_enabled_p ())
1019 dump_printf_loc (MSG_NOTE, vect_location,
1020 "vect_model_store_cost: aligned.\n");
1021 break;
1024 case dr_unaligned_supported:
1026 /* Here, we assign an additional cost for the unaligned store. */
1027 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1028 unaligned_store, stmt_info,
1029 DR_MISALIGNMENT (dr), vect_body);
1030 if (dump_enabled_p ())
1031 dump_printf_loc (MSG_NOTE, vect_location,
1032 "vect_model_store_cost: unaligned supported by "
1033 "hardware.\n");
1034 break;
1037 case dr_unaligned_unsupported:
1039 *inside_cost = VECT_MAX_COST;
1041 if (dump_enabled_p ())
1042 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1043 "vect_model_store_cost: unsupported access.\n");
1044 break;
1047 default:
1048 gcc_unreachable ();
1053 /* Function vect_model_load_cost
1055 Models cost for loads. In the case of grouped accesses, the last access
1056 has the overhead of the grouped access attributed to it. Since unaligned
1057 accesses are supported for loads, we also account for the costs of the
1058 access scheme chosen. */
1060 void
1061 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1062 bool load_lanes_p, slp_tree slp_node,
1063 stmt_vector_for_cost *prologue_cost_vec,
1064 stmt_vector_for_cost *body_cost_vec)
1066 int group_size;
1067 gimple first_stmt;
1068 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1069 unsigned int inside_cost = 0, prologue_cost = 0;
1071 /* The SLP costs were already calculated during SLP tree build. */
1072 if (PURE_SLP_STMT (stmt_info))
1073 return;
1075 /* Grouped accesses? */
1076 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1077 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1079 group_size = vect_cost_group_size (stmt_info);
1080 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1082 /* Not a grouped access. */
1083 else
1085 group_size = 1;
1086 first_dr = dr;
1089 /* We assume that the cost of a single load-lanes instruction is
1090 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1091 access is instead being provided by a load-and-permute operation,
1092 include the cost of the permutes. */
1093 if (!load_lanes_p && group_size > 1)
1095 /* Uses an even and odd extract operations or shuffle operations
1096 for each needed permute. */
1097 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1098 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1099 stmt_info, 0, vect_body);
1101 if (dump_enabled_p ())
1102 dump_printf_loc (MSG_NOTE, vect_location,
1103 "vect_model_load_cost: strided group_size = %d .\n",
1104 group_size);
1107 /* The loads themselves. */
1108 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1110 /* N scalar loads plus gathering them into a vector. */
1111 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1112 inside_cost += record_stmt_cost (body_cost_vec,
1113 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1114 scalar_load, stmt_info, 0, vect_body);
1115 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1116 stmt_info, 0, vect_body);
1118 else
1119 vect_get_load_cost (first_dr, ncopies,
1120 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1121 || group_size > 1 || slp_node),
1122 &inside_cost, &prologue_cost,
1123 prologue_cost_vec, body_cost_vec, true);
1125 if (dump_enabled_p ())
1126 dump_printf_loc (MSG_NOTE, vect_location,
1127 "vect_model_load_cost: inside_cost = %d, "
1128 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1132 /* Calculate cost of DR's memory access. */
1133 void
1134 vect_get_load_cost (struct data_reference *dr, int ncopies,
1135 bool add_realign_cost, unsigned int *inside_cost,
1136 unsigned int *prologue_cost,
1137 stmt_vector_for_cost *prologue_cost_vec,
1138 stmt_vector_for_cost *body_cost_vec,
1139 bool record_prologue_costs)
1141 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1142 gimple stmt = DR_STMT (dr);
1143 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1145 switch (alignment_support_scheme)
1147 case dr_aligned:
1149 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1150 stmt_info, 0, vect_body);
1152 if (dump_enabled_p ())
1153 dump_printf_loc (MSG_NOTE, vect_location,
1154 "vect_model_load_cost: aligned.\n");
1156 break;
1158 case dr_unaligned_supported:
1160 /* Here, we assign an additional cost for the unaligned load. */
1161 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1162 unaligned_load, stmt_info,
1163 DR_MISALIGNMENT (dr), vect_body);
1165 if (dump_enabled_p ())
1166 dump_printf_loc (MSG_NOTE, vect_location,
1167 "vect_model_load_cost: unaligned supported by "
1168 "hardware.\n");
1170 break;
1172 case dr_explicit_realign:
1174 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1175 vector_load, stmt_info, 0, vect_body);
1176 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1177 vec_perm, stmt_info, 0, vect_body);
1179 /* FIXME: If the misalignment remains fixed across the iterations of
1180 the containing loop, the following cost should be added to the
1181 prologue costs. */
1182 if (targetm.vectorize.builtin_mask_for_load)
1183 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1184 stmt_info, 0, vect_body);
1186 if (dump_enabled_p ())
1187 dump_printf_loc (MSG_NOTE, vect_location,
1188 "vect_model_load_cost: explicit realign\n");
1190 break;
1192 case dr_explicit_realign_optimized:
1194 if (dump_enabled_p ())
1195 dump_printf_loc (MSG_NOTE, vect_location,
1196 "vect_model_load_cost: unaligned software "
1197 "pipelined.\n");
1199 /* Unaligned software pipeline has a load of an address, an initial
1200 load, and possibly a mask operation to "prime" the loop. However,
1201 if this is an access in a group of loads, which provide grouped
1202 access, then the above cost should only be considered for one
1203 access in the group. Inside the loop, there is a load op
1204 and a realignment op. */
1206 if (add_realign_cost && record_prologue_costs)
1208 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1209 vector_stmt, stmt_info,
1210 0, vect_prologue);
1211 if (targetm.vectorize.builtin_mask_for_load)
1212 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1213 vector_stmt, stmt_info,
1214 0, vect_prologue);
1217 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1218 stmt_info, 0, vect_body);
1219 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1220 stmt_info, 0, vect_body);
1222 if (dump_enabled_p ())
1223 dump_printf_loc (MSG_NOTE, vect_location,
1224 "vect_model_load_cost: explicit realign optimized"
1225 "\n");
1227 break;
1230 case dr_unaligned_unsupported:
1232 *inside_cost = VECT_MAX_COST;
1234 if (dump_enabled_p ())
1235 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1236 "vect_model_load_cost: unsupported access.\n");
1237 break;
1240 default:
1241 gcc_unreachable ();
1245 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1246 the loop preheader for the vectorized stmt STMT. */
1248 static void
1249 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1251 if (gsi)
1252 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1253 else
1255 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1256 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1258 if (loop_vinfo)
1260 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1261 basic_block new_bb;
1262 edge pe;
1264 if (nested_in_vect_loop_p (loop, stmt))
1265 loop = loop->inner;
1267 pe = loop_preheader_edge (loop);
1268 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1269 gcc_assert (!new_bb);
1271 else
1273 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1274 basic_block bb;
1275 gimple_stmt_iterator gsi_bb_start;
1277 gcc_assert (bb_vinfo);
1278 bb = BB_VINFO_BB (bb_vinfo);
1279 gsi_bb_start = gsi_after_labels (bb);
1280 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1284 if (dump_enabled_p ())
1286 dump_printf_loc (MSG_NOTE, vect_location,
1287 "created new init_stmt: ");
1288 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1289 dump_printf (MSG_NOTE, "\n");
1293 /* Function vect_init_vector.
1295 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1296 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1297 vector type a vector with all elements equal to VAL is created first.
1298 Place the initialization at BSI if it is not NULL. Otherwise, place the
1299 initialization at the loop preheader.
1300 Return the DEF of INIT_STMT.
1301 It will be used in the vectorization of STMT. */
1303 tree
1304 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1306 tree new_var;
1307 gimple init_stmt;
1308 tree vec_oprnd;
1309 tree new_temp;
1311 if (TREE_CODE (type) == VECTOR_TYPE
1312 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1314 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1316 if (CONSTANT_CLASS_P (val))
1317 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1318 else
1320 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
1321 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
1322 new_temp, val,
1323 NULL_TREE);
1324 vect_init_vector_1 (stmt, init_stmt, gsi);
1325 val = new_temp;
1328 val = build_vector_from_val (type, val);
1331 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1332 init_stmt = gimple_build_assign (new_var, val);
1333 new_temp = make_ssa_name (new_var, init_stmt);
1334 gimple_assign_set_lhs (init_stmt, new_temp);
1335 vect_init_vector_1 (stmt, init_stmt, gsi);
1336 vec_oprnd = gimple_assign_lhs (init_stmt);
1337 return vec_oprnd;
1341 /* Function vect_get_vec_def_for_operand.
1343 OP is an operand in STMT. This function returns a (vector) def that will be
1344 used in the vectorized stmt for STMT.
1346 In the case that OP is an SSA_NAME which is defined in the loop, then
1347 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1349 In case OP is an invariant or constant, a new stmt that creates a vector def
1350 needs to be introduced. */
1352 tree
1353 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1355 tree vec_oprnd;
1356 gimple vec_stmt;
1357 gimple def_stmt;
1358 stmt_vec_info def_stmt_info = NULL;
1359 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1360 unsigned int nunits;
1361 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1362 tree def;
1363 enum vect_def_type dt;
1364 bool is_simple_use;
1365 tree vector_type;
1367 if (dump_enabled_p ())
1369 dump_printf_loc (MSG_NOTE, vect_location,
1370 "vect_get_vec_def_for_operand: ");
1371 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1372 dump_printf (MSG_NOTE, "\n");
1375 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1376 &def_stmt, &def, &dt);
1377 gcc_assert (is_simple_use);
1378 if (dump_enabled_p ())
1380 int loc_printed = 0;
1381 if (def)
1383 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1384 loc_printed = 1;
1385 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1386 dump_printf (MSG_NOTE, "\n");
1388 if (def_stmt)
1390 if (loc_printed)
1391 dump_printf (MSG_NOTE, " def_stmt = ");
1392 else
1393 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1394 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1395 dump_printf (MSG_NOTE, "\n");
1399 switch (dt)
1401 /* Case 1: operand is a constant. */
1402 case vect_constant_def:
1404 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1405 gcc_assert (vector_type);
1406 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1408 if (scalar_def)
1409 *scalar_def = op;
1411 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1412 if (dump_enabled_p ())
1413 dump_printf_loc (MSG_NOTE, vect_location,
1414 "Create vector_cst. nunits = %d\n", nunits);
1416 return vect_init_vector (stmt, op, vector_type, NULL);
1419 /* Case 2: operand is defined outside the loop - loop invariant. */
1420 case vect_external_def:
1422 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1423 gcc_assert (vector_type);
1425 if (scalar_def)
1426 *scalar_def = def;
1428 /* Create 'vec_inv = {inv,inv,..,inv}' */
1429 if (dump_enabled_p ())
1430 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1432 return vect_init_vector (stmt, def, vector_type, NULL);
1435 /* Case 3: operand is defined inside the loop. */
1436 case vect_internal_def:
1438 if (scalar_def)
1439 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1441 /* Get the def from the vectorized stmt. */
1442 def_stmt_info = vinfo_for_stmt (def_stmt);
1444 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1445 /* Get vectorized pattern statement. */
1446 if (!vec_stmt
1447 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1448 && !STMT_VINFO_RELEVANT (def_stmt_info))
1449 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1450 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1451 gcc_assert (vec_stmt);
1452 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1453 vec_oprnd = PHI_RESULT (vec_stmt);
1454 else if (is_gimple_call (vec_stmt))
1455 vec_oprnd = gimple_call_lhs (vec_stmt);
1456 else
1457 vec_oprnd = gimple_assign_lhs (vec_stmt);
1458 return vec_oprnd;
1461 /* Case 4: operand is defined by a loop header phi - reduction */
1462 case vect_reduction_def:
1463 case vect_double_reduction_def:
1464 case vect_nested_cycle:
1466 struct loop *loop;
1468 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1469 loop = (gimple_bb (def_stmt))->loop_father;
1471 /* Get the def before the loop */
1472 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1473 return get_initial_def_for_reduction (stmt, op, scalar_def);
1476 /* Case 5: operand is defined by loop-header phi - induction. */
1477 case vect_induction_def:
1479 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1481 /* Get the def from the vectorized stmt. */
1482 def_stmt_info = vinfo_for_stmt (def_stmt);
1483 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1484 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1485 vec_oprnd = PHI_RESULT (vec_stmt);
1486 else
1487 vec_oprnd = gimple_get_lhs (vec_stmt);
1488 return vec_oprnd;
1491 default:
1492 gcc_unreachable ();
1497 /* Function vect_get_vec_def_for_stmt_copy
1499 Return a vector-def for an operand. This function is used when the
1500 vectorized stmt to be created (by the caller to this function) is a "copy"
1501 created in case the vectorized result cannot fit in one vector, and several
1502 copies of the vector-stmt are required. In this case the vector-def is
1503 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1504 of the stmt that defines VEC_OPRND.
1505 DT is the type of the vector def VEC_OPRND.
1507 Context:
1508 In case the vectorization factor (VF) is bigger than the number
1509 of elements that can fit in a vectype (nunits), we have to generate
1510 more than one vector stmt to vectorize the scalar stmt. This situation
1511 arises when there are multiple data-types operated upon in the loop; the
1512 smallest data-type determines the VF, and as a result, when vectorizing
1513 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1514 vector stmt (each computing a vector of 'nunits' results, and together
1515 computing 'VF' results in each iteration). This function is called when
1516 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1517 which VF=16 and nunits=4, so the number of copies required is 4):
1519 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1521 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1522 VS1.1: vx.1 = memref1 VS1.2
1523 VS1.2: vx.2 = memref2 VS1.3
1524 VS1.3: vx.3 = memref3
1526 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1527 VSnew.1: vz1 = vx.1 + ... VSnew.2
1528 VSnew.2: vz2 = vx.2 + ... VSnew.3
1529 VSnew.3: vz3 = vx.3 + ...
1531 The vectorization of S1 is explained in vectorizable_load.
1532 The vectorization of S2:
1533 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1534 the function 'vect_get_vec_def_for_operand' is called to
1535 get the relevant vector-def for each operand of S2. For operand x it
1536 returns the vector-def 'vx.0'.
1538 To create the remaining copies of the vector-stmt (VSnew.j), this
1539 function is called to get the relevant vector-def for each operand. It is
1540 obtained from the respective VS1.j stmt, which is recorded in the
1541 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1543 For example, to obtain the vector-def 'vx.1' in order to create the
1544 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1545 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1546 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1547 and return its def ('vx.1').
1548 Overall, to create the above sequence this function will be called 3 times:
1549 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1550 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1551 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1553 tree
1554 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1556 gimple vec_stmt_for_operand;
1557 stmt_vec_info def_stmt_info;
1559 /* Do nothing; can reuse same def. */
1560 if (dt == vect_external_def || dt == vect_constant_def )
1561 return vec_oprnd;
1563 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1564 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1565 gcc_assert (def_stmt_info);
1566 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1567 gcc_assert (vec_stmt_for_operand);
1568 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1569 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1570 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1571 else
1572 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1573 return vec_oprnd;
1577 /* Get vectorized definitions for the operands to create a copy of an original
1578 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1580 static void
1581 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1582 vec<tree> *vec_oprnds0,
1583 vec<tree> *vec_oprnds1)
1585 tree vec_oprnd = vec_oprnds0->pop ();
1587 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1588 vec_oprnds0->quick_push (vec_oprnd);
1590 if (vec_oprnds1 && vec_oprnds1->length ())
1592 vec_oprnd = vec_oprnds1->pop ();
1593 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1594 vec_oprnds1->quick_push (vec_oprnd);
1599 /* Get vectorized definitions for OP0 and OP1.
1600 REDUC_INDEX is the index of reduction operand in case of reduction,
1601 and -1 otherwise. */
1603 void
1604 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1605 vec<tree> *vec_oprnds0,
1606 vec<tree> *vec_oprnds1,
1607 slp_tree slp_node, int reduc_index)
1609 if (slp_node)
1611 int nops = (op1 == NULL_TREE) ? 1 : 2;
1612 auto_vec<tree> ops (nops);
1613 auto_vec<vec<tree> > vec_defs (nops);
1615 ops.quick_push (op0);
1616 if (op1)
1617 ops.quick_push (op1);
1619 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1621 *vec_oprnds0 = vec_defs[0];
1622 if (op1)
1623 *vec_oprnds1 = vec_defs[1];
1625 else
1627 tree vec_oprnd;
1629 vec_oprnds0->create (1);
1630 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1631 vec_oprnds0->quick_push (vec_oprnd);
1633 if (op1)
1635 vec_oprnds1->create (1);
1636 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1637 vec_oprnds1->quick_push (vec_oprnd);
1643 /* Function vect_finish_stmt_generation.
1645 Insert a new stmt. */
1647 void
1648 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1649 gimple_stmt_iterator *gsi)
1651 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1652 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1653 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1655 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1657 if (!gsi_end_p (*gsi)
1658 && gimple_has_mem_ops (vec_stmt))
1660 gimple at_stmt = gsi_stmt (*gsi);
1661 tree vuse = gimple_vuse (at_stmt);
1662 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1664 tree vdef = gimple_vdef (at_stmt);
1665 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1666 /* If we have an SSA vuse and insert a store, update virtual
1667 SSA form to avoid triggering the renamer. Do so only
1668 if we can easily see all uses - which is what almost always
1669 happens with the way vectorized stmts are inserted. */
1670 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1671 && ((is_gimple_assign (vec_stmt)
1672 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1673 || (is_gimple_call (vec_stmt)
1674 && !(gimple_call_flags (vec_stmt)
1675 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1677 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1678 gimple_set_vdef (vec_stmt, new_vdef);
1679 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1683 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1685 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1686 bb_vinfo));
1688 if (dump_enabled_p ())
1690 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1691 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1692 dump_printf (MSG_NOTE, "\n");
1695 gimple_set_location (vec_stmt, gimple_location (stmt));
1697 /* While EH edges will generally prevent vectorization, stmt might
1698 e.g. be in a must-not-throw region. Ensure newly created stmts
1699 that could throw are part of the same region. */
1700 int lp_nr = lookup_stmt_eh_lp (stmt);
1701 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1702 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1705 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1706 a function declaration if the target has a vectorized version
1707 of the function, or NULL_TREE if the function cannot be vectorized. */
1709 tree
1710 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1712 tree fndecl = gimple_call_fndecl (call);
1714 /* We only handle functions that do not read or clobber memory -- i.e.
1715 const or novops ones. */
1716 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1717 return NULL_TREE;
1719 if (!fndecl
1720 || TREE_CODE (fndecl) != FUNCTION_DECL
1721 || !DECL_BUILT_IN (fndecl))
1722 return NULL_TREE;
1724 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1725 vectype_in);
1729 static tree permute_vec_elements (tree, tree, tree, gimple,
1730 gimple_stmt_iterator *);
1733 /* Function vectorizable_mask_load_store.
1735 Check if STMT performs a conditional load or store that can be vectorized.
1736 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1737 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1738 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1740 static bool
1741 vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1742 gimple *vec_stmt, slp_tree slp_node)
1744 tree vec_dest = NULL;
1745 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1746 stmt_vec_info prev_stmt_info;
1747 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1748 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1749 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1750 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1751 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1752 tree elem_type;
1753 gimple new_stmt;
1754 tree dummy;
1755 tree dataref_ptr = NULL_TREE;
1756 gimple ptr_incr;
1757 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1758 int ncopies;
1759 int i, j;
1760 bool inv_p;
1761 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1762 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1763 int gather_scale = 1;
1764 enum vect_def_type gather_dt = vect_unknown_def_type;
1765 bool is_store;
1766 tree mask;
1767 gimple def_stmt;
1768 tree def;
1769 enum vect_def_type dt;
1771 if (slp_node != NULL)
1772 return false;
1774 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1775 gcc_assert (ncopies >= 1);
1777 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1778 mask = gimple_call_arg (stmt, 2);
1779 if (TYPE_PRECISION (TREE_TYPE (mask))
1780 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1781 return false;
1783 /* FORNOW. This restriction should be relaxed. */
1784 if (nested_in_vect_loop && ncopies > 1)
1786 if (dump_enabled_p ())
1787 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1788 "multiple types in nested loop.");
1789 return false;
1792 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1793 return false;
1795 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1796 return false;
1798 if (!STMT_VINFO_DATA_REF (stmt_info))
1799 return false;
1801 elem_type = TREE_TYPE (vectype);
1803 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1804 return false;
1806 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1807 return false;
1809 if (STMT_VINFO_GATHER_P (stmt_info))
1811 gimple def_stmt;
1812 tree def;
1813 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1814 &gather_off, &gather_scale);
1815 gcc_assert (gather_decl);
1816 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1817 &def_stmt, &def, &gather_dt,
1818 &gather_off_vectype))
1820 if (dump_enabled_p ())
1821 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1822 "gather index use not simple.");
1823 return false;
1826 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1827 tree masktype
1828 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1829 if (TREE_CODE (masktype) == INTEGER_TYPE)
1831 if (dump_enabled_p ())
1832 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1833 "masked gather with integer mask not supported.");
1834 return false;
1837 else if (tree_int_cst_compare (nested_in_vect_loop
1838 ? STMT_VINFO_DR_STEP (stmt_info)
1839 : DR_STEP (dr), size_zero_node) <= 0)
1840 return false;
1841 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1842 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1843 return false;
1845 if (TREE_CODE (mask) != SSA_NAME)
1846 return false;
1848 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1849 &def_stmt, &def, &dt))
1850 return false;
1852 if (is_store)
1854 tree rhs = gimple_call_arg (stmt, 3);
1855 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1856 &def_stmt, &def, &dt))
1857 return false;
1860 if (!vec_stmt) /* transformation not required. */
1862 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1863 if (is_store)
1864 vect_model_store_cost (stmt_info, ncopies, false, dt,
1865 NULL, NULL, NULL);
1866 else
1867 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1868 return true;
1871 /** Transform. **/
1873 if (STMT_VINFO_GATHER_P (stmt_info))
1875 tree vec_oprnd0 = NULL_TREE, op;
1876 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1877 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1878 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1879 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1880 tree mask_perm_mask = NULL_TREE;
1881 edge pe = loop_preheader_edge (loop);
1882 gimple_seq seq;
1883 basic_block new_bb;
1884 enum { NARROW, NONE, WIDEN } modifier;
1885 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1887 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1888 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1889 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1890 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1891 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1892 scaletype = TREE_VALUE (arglist);
1893 gcc_checking_assert (types_compatible_p (srctype, rettype)
1894 && types_compatible_p (srctype, masktype));
1896 if (nunits == gather_off_nunits)
1897 modifier = NONE;
1898 else if (nunits == gather_off_nunits / 2)
1900 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1901 modifier = WIDEN;
1903 for (i = 0; i < gather_off_nunits; ++i)
1904 sel[i] = i | nunits;
1906 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
1907 gcc_assert (perm_mask != NULL_TREE);
1909 else if (nunits == gather_off_nunits * 2)
1911 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1912 modifier = NARROW;
1914 for (i = 0; i < nunits; ++i)
1915 sel[i] = i < gather_off_nunits
1916 ? i : i + nunits - gather_off_nunits;
1918 perm_mask = vect_gen_perm_mask (vectype, sel);
1919 gcc_assert (perm_mask != NULL_TREE);
1920 ncopies *= 2;
1921 for (i = 0; i < nunits; ++i)
1922 sel[i] = i | gather_off_nunits;
1923 mask_perm_mask = vect_gen_perm_mask (masktype, sel);
1924 gcc_assert (mask_perm_mask != NULL_TREE);
1926 else
1927 gcc_unreachable ();
1929 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1931 ptr = fold_convert (ptrtype, gather_base);
1932 if (!is_gimple_min_invariant (ptr))
1934 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1935 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1936 gcc_assert (!new_bb);
1939 scale = build_int_cst (scaletype, gather_scale);
1941 prev_stmt_info = NULL;
1942 for (j = 0; j < ncopies; ++j)
1944 if (modifier == WIDEN && (j & 1))
1945 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1946 perm_mask, stmt, gsi);
1947 else if (j == 0)
1948 op = vec_oprnd0
1949 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1950 else
1951 op = vec_oprnd0
1952 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1954 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1956 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1957 == TYPE_VECTOR_SUBPARTS (idxtype));
1958 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1959 var = make_ssa_name (var, NULL);
1960 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1961 new_stmt
1962 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
1963 op, NULL_TREE);
1964 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1965 op = var;
1968 if (mask_perm_mask && (j & 1))
1969 mask_op = permute_vec_elements (mask_op, mask_op,
1970 mask_perm_mask, stmt, gsi);
1971 else
1973 if (j == 0)
1974 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1975 else
1977 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
1978 &def_stmt, &def, &dt);
1979 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1982 mask_op = vec_mask;
1983 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1985 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
1986 == TYPE_VECTOR_SUBPARTS (masktype));
1987 var = vect_get_new_vect_var (masktype, vect_simple_var,
1988 NULL);
1989 var = make_ssa_name (var, NULL);
1990 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
1991 new_stmt
1992 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
1993 mask_op, NULL_TREE);
1994 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1995 mask_op = var;
1999 new_stmt
2000 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
2001 scale);
2003 if (!useless_type_conversion_p (vectype, rettype))
2005 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2006 == TYPE_VECTOR_SUBPARTS (rettype));
2007 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2008 op = make_ssa_name (var, new_stmt);
2009 gimple_call_set_lhs (new_stmt, op);
2010 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2011 var = make_ssa_name (vec_dest, NULL);
2012 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2013 new_stmt
2014 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
2015 NULL_TREE);
2017 else
2019 var = make_ssa_name (vec_dest, new_stmt);
2020 gimple_call_set_lhs (new_stmt, var);
2023 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2025 if (modifier == NARROW)
2027 if ((j & 1) == 0)
2029 prev_res = var;
2030 continue;
2032 var = permute_vec_elements (prev_res, var,
2033 perm_mask, stmt, gsi);
2034 new_stmt = SSA_NAME_DEF_STMT (var);
2037 if (prev_stmt_info == NULL)
2038 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2039 else
2040 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2041 prev_stmt_info = vinfo_for_stmt (new_stmt);
2044 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2045 from the IL. */
2046 tree lhs = gimple_call_lhs (stmt);
2047 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2048 set_vinfo_for_stmt (new_stmt, stmt_info);
2049 set_vinfo_for_stmt (stmt, NULL);
2050 STMT_VINFO_STMT (stmt_info) = new_stmt;
2051 gsi_replace (gsi, new_stmt, true);
2052 return true;
2054 else if (is_store)
2056 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2057 prev_stmt_info = NULL;
2058 for (i = 0; i < ncopies; i++)
2060 unsigned align, misalign;
2062 if (i == 0)
2064 tree rhs = gimple_call_arg (stmt, 3);
2065 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2066 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2067 /* We should have catched mismatched types earlier. */
2068 gcc_assert (useless_type_conversion_p (vectype,
2069 TREE_TYPE (vec_rhs)));
2070 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2071 NULL_TREE, &dummy, gsi,
2072 &ptr_incr, false, &inv_p);
2073 gcc_assert (!inv_p);
2075 else
2077 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2078 &def, &dt);
2079 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2080 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2081 &def, &dt);
2082 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2083 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2084 TYPE_SIZE_UNIT (vectype));
2087 align = TYPE_ALIGN_UNIT (vectype);
2088 if (aligned_access_p (dr))
2089 misalign = 0;
2090 else if (DR_MISALIGNMENT (dr) == -1)
2092 align = TYPE_ALIGN_UNIT (elem_type);
2093 misalign = 0;
2095 else
2096 misalign = DR_MISALIGNMENT (dr);
2097 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2098 misalign);
2099 new_stmt
2100 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2101 gimple_call_arg (stmt, 1),
2102 vec_mask, vec_rhs);
2103 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2104 if (i == 0)
2105 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2106 else
2107 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2108 prev_stmt_info = vinfo_for_stmt (new_stmt);
2111 else
2113 tree vec_mask = NULL_TREE;
2114 prev_stmt_info = NULL;
2115 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2116 for (i = 0; i < ncopies; i++)
2118 unsigned align, misalign;
2120 if (i == 0)
2122 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2123 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2124 NULL_TREE, &dummy, gsi,
2125 &ptr_incr, false, &inv_p);
2126 gcc_assert (!inv_p);
2128 else
2130 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2131 &def, &dt);
2132 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2133 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2134 TYPE_SIZE_UNIT (vectype));
2137 align = TYPE_ALIGN_UNIT (vectype);
2138 if (aligned_access_p (dr))
2139 misalign = 0;
2140 else if (DR_MISALIGNMENT (dr) == -1)
2142 align = TYPE_ALIGN_UNIT (elem_type);
2143 misalign = 0;
2145 else
2146 misalign = DR_MISALIGNMENT (dr);
2147 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2148 misalign);
2149 new_stmt
2150 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2151 gimple_call_arg (stmt, 1),
2152 vec_mask);
2153 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest, NULL));
2154 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2155 if (i == 0)
2156 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2157 else
2158 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2159 prev_stmt_info = vinfo_for_stmt (new_stmt);
2163 if (!is_store)
2165 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2166 from the IL. */
2167 tree lhs = gimple_call_lhs (stmt);
2168 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2169 set_vinfo_for_stmt (new_stmt, stmt_info);
2170 set_vinfo_for_stmt (stmt, NULL);
2171 STMT_VINFO_STMT (stmt_info) = new_stmt;
2172 gsi_replace (gsi, new_stmt, true);
2175 return true;
2179 /* Function vectorizable_call.
2181 Check if STMT performs a function call that can be vectorized.
2182 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2183 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2184 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2186 static bool
2187 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2188 slp_tree slp_node)
2190 tree vec_dest;
2191 tree scalar_dest;
2192 tree op, type;
2193 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2194 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2195 tree vectype_out, vectype_in;
2196 int nunits_in;
2197 int nunits_out;
2198 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2199 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2200 tree fndecl, new_temp, def, rhs_type;
2201 gimple def_stmt;
2202 enum vect_def_type dt[3]
2203 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2204 gimple new_stmt = NULL;
2205 int ncopies, j;
2206 vec<tree> vargs = vNULL;
2207 enum { NARROW, NONE, WIDEN } modifier;
2208 size_t i, nargs;
2209 tree lhs;
2211 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2212 return false;
2214 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2215 return false;
2217 /* Is STMT a vectorizable call? */
2218 if (!is_gimple_call (stmt))
2219 return false;
2221 if (gimple_call_internal_p (stmt)
2222 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2223 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2224 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2225 slp_node);
2227 if (gimple_call_lhs (stmt) == NULL_TREE
2228 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2229 return false;
2231 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2233 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2235 /* Process function arguments. */
2236 rhs_type = NULL_TREE;
2237 vectype_in = NULL_TREE;
2238 nargs = gimple_call_num_args (stmt);
2240 /* Bail out if the function has more than three arguments, we do not have
2241 interesting builtin functions to vectorize with more than two arguments
2242 except for fma. No arguments is also not good. */
2243 if (nargs == 0 || nargs > 3)
2244 return false;
2246 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2247 if (gimple_call_internal_p (stmt)
2248 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2250 nargs = 0;
2251 rhs_type = unsigned_type_node;
2254 for (i = 0; i < nargs; i++)
2256 tree opvectype;
2258 op = gimple_call_arg (stmt, i);
2260 /* We can only handle calls with arguments of the same type. */
2261 if (rhs_type
2262 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2264 if (dump_enabled_p ())
2265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2266 "argument types differ.\n");
2267 return false;
2269 if (!rhs_type)
2270 rhs_type = TREE_TYPE (op);
2272 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2273 &def_stmt, &def, &dt[i], &opvectype))
2275 if (dump_enabled_p ())
2276 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2277 "use not simple.\n");
2278 return false;
2281 if (!vectype_in)
2282 vectype_in = opvectype;
2283 else if (opvectype
2284 && opvectype != vectype_in)
2286 if (dump_enabled_p ())
2287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2288 "argument vector types differ.\n");
2289 return false;
2292 /* If all arguments are external or constant defs use a vector type with
2293 the same size as the output vector type. */
2294 if (!vectype_in)
2295 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2296 if (vec_stmt)
2297 gcc_assert (vectype_in);
2298 if (!vectype_in)
2300 if (dump_enabled_p ())
2302 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2303 "no vectype for scalar type ");
2304 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2305 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2308 return false;
2311 /* FORNOW */
2312 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2313 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2314 if (nunits_in == nunits_out / 2)
2315 modifier = NARROW;
2316 else if (nunits_out == nunits_in)
2317 modifier = NONE;
2318 else if (nunits_out == nunits_in / 2)
2319 modifier = WIDEN;
2320 else
2321 return false;
2323 /* For now, we only vectorize functions if a target specific builtin
2324 is available. TODO -- in some cases, it might be profitable to
2325 insert the calls for pieces of the vector, in order to be able
2326 to vectorize other operations in the loop. */
2327 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2328 if (fndecl == NULL_TREE)
2330 if (gimple_call_internal_p (stmt)
2331 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2332 && !slp_node
2333 && loop_vinfo
2334 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2335 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2336 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2337 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2339 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2340 { 0, 1, 2, ... vf - 1 } vector. */
2341 gcc_assert (nargs == 0);
2343 else
2345 if (dump_enabled_p ())
2346 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2347 "function is not vectorizable.\n");
2348 return false;
2352 gcc_assert (!gimple_vuse (stmt));
2354 if (slp_node || PURE_SLP_STMT (stmt_info))
2355 ncopies = 1;
2356 else if (modifier == NARROW)
2357 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2358 else
2359 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2361 /* Sanity check: make sure that at least one copy of the vectorized stmt
2362 needs to be generated. */
2363 gcc_assert (ncopies >= 1);
2365 if (!vec_stmt) /* transformation not required. */
2367 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2368 if (dump_enabled_p ())
2369 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2370 "\n");
2371 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2372 return true;
2375 /** Transform. **/
2377 if (dump_enabled_p ())
2378 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2380 /* Handle def. */
2381 scalar_dest = gimple_call_lhs (stmt);
2382 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2384 prev_stmt_info = NULL;
2385 switch (modifier)
2387 case NONE:
2388 for (j = 0; j < ncopies; ++j)
2390 /* Build argument list for the vectorized call. */
2391 if (j == 0)
2392 vargs.create (nargs);
2393 else
2394 vargs.truncate (0);
2396 if (slp_node)
2398 auto_vec<vec<tree> > vec_defs (nargs);
2399 vec<tree> vec_oprnds0;
2401 for (i = 0; i < nargs; i++)
2402 vargs.quick_push (gimple_call_arg (stmt, i));
2403 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2404 vec_oprnds0 = vec_defs[0];
2406 /* Arguments are ready. Create the new vector stmt. */
2407 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2409 size_t k;
2410 for (k = 0; k < nargs; k++)
2412 vec<tree> vec_oprndsk = vec_defs[k];
2413 vargs[k] = vec_oprndsk[i];
2415 new_stmt = gimple_build_call_vec (fndecl, vargs);
2416 new_temp = make_ssa_name (vec_dest, new_stmt);
2417 gimple_call_set_lhs (new_stmt, new_temp);
2418 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2419 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2422 for (i = 0; i < nargs; i++)
2424 vec<tree> vec_oprndsi = vec_defs[i];
2425 vec_oprndsi.release ();
2427 continue;
2430 for (i = 0; i < nargs; i++)
2432 op = gimple_call_arg (stmt, i);
2433 if (j == 0)
2434 vec_oprnd0
2435 = vect_get_vec_def_for_operand (op, stmt, NULL);
2436 else
2438 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2439 vec_oprnd0
2440 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2443 vargs.quick_push (vec_oprnd0);
2446 if (gimple_call_internal_p (stmt)
2447 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2449 tree *v = XALLOCAVEC (tree, nunits_out);
2450 int k;
2451 for (k = 0; k < nunits_out; ++k)
2452 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2453 tree cst = build_vector (vectype_out, v);
2454 tree new_var
2455 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2456 gimple init_stmt = gimple_build_assign (new_var, cst);
2457 new_temp = make_ssa_name (new_var, init_stmt);
2458 gimple_assign_set_lhs (init_stmt, new_temp);
2459 vect_init_vector_1 (stmt, init_stmt, NULL);
2460 new_temp = make_ssa_name (vec_dest, NULL);
2461 new_stmt = gimple_build_assign (new_temp,
2462 gimple_assign_lhs (init_stmt));
2464 else
2466 new_stmt = gimple_build_call_vec (fndecl, vargs);
2467 new_temp = make_ssa_name (vec_dest, new_stmt);
2468 gimple_call_set_lhs (new_stmt, new_temp);
2470 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2472 if (j == 0)
2473 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2474 else
2475 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2477 prev_stmt_info = vinfo_for_stmt (new_stmt);
2480 break;
2482 case NARROW:
2483 for (j = 0; j < ncopies; ++j)
2485 /* Build argument list for the vectorized call. */
2486 if (j == 0)
2487 vargs.create (nargs * 2);
2488 else
2489 vargs.truncate (0);
2491 if (slp_node)
2493 auto_vec<vec<tree> > vec_defs (nargs);
2494 vec<tree> vec_oprnds0;
2496 for (i = 0; i < nargs; i++)
2497 vargs.quick_push (gimple_call_arg (stmt, i));
2498 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2499 vec_oprnds0 = vec_defs[0];
2501 /* Arguments are ready. Create the new vector stmt. */
2502 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2504 size_t k;
2505 vargs.truncate (0);
2506 for (k = 0; k < nargs; k++)
2508 vec<tree> vec_oprndsk = vec_defs[k];
2509 vargs.quick_push (vec_oprndsk[i]);
2510 vargs.quick_push (vec_oprndsk[i + 1]);
2512 new_stmt = gimple_build_call_vec (fndecl, vargs);
2513 new_temp = make_ssa_name (vec_dest, new_stmt);
2514 gimple_call_set_lhs (new_stmt, new_temp);
2515 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2516 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2519 for (i = 0; i < nargs; i++)
2521 vec<tree> vec_oprndsi = vec_defs[i];
2522 vec_oprndsi.release ();
2524 continue;
2527 for (i = 0; i < nargs; i++)
2529 op = gimple_call_arg (stmt, i);
2530 if (j == 0)
2532 vec_oprnd0
2533 = vect_get_vec_def_for_operand (op, stmt, NULL);
2534 vec_oprnd1
2535 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2537 else
2539 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2540 vec_oprnd0
2541 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2542 vec_oprnd1
2543 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2546 vargs.quick_push (vec_oprnd0);
2547 vargs.quick_push (vec_oprnd1);
2550 new_stmt = gimple_build_call_vec (fndecl, vargs);
2551 new_temp = make_ssa_name (vec_dest, new_stmt);
2552 gimple_call_set_lhs (new_stmt, new_temp);
2553 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2555 if (j == 0)
2556 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2557 else
2558 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2560 prev_stmt_info = vinfo_for_stmt (new_stmt);
2563 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2565 break;
2567 case WIDEN:
2568 /* No current target implements this case. */
2569 return false;
2572 vargs.release ();
2574 /* The call in STMT might prevent it from being removed in dce.
2575 We however cannot remove it here, due to the way the ssa name
2576 it defines is mapped to the new definition. So just replace
2577 rhs of the statement with something harmless. */
2579 if (slp_node)
2580 return true;
2582 type = TREE_TYPE (scalar_dest);
2583 if (is_pattern_stmt_p (stmt_info))
2584 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2585 else
2586 lhs = gimple_call_lhs (stmt);
2587 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2588 set_vinfo_for_stmt (new_stmt, stmt_info);
2589 set_vinfo_for_stmt (stmt, NULL);
2590 STMT_VINFO_STMT (stmt_info) = new_stmt;
2591 gsi_replace (gsi, new_stmt, false);
2593 return true;
2597 struct simd_call_arg_info
2599 tree vectype;
2600 tree op;
2601 enum vect_def_type dt;
2602 HOST_WIDE_INT linear_step;
2603 unsigned int align;
2606 /* Function vectorizable_simd_clone_call.
2608 Check if STMT performs a function call that can be vectorized
2609 by calling a simd clone of the function.
2610 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2611 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2612 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2614 static bool
2615 vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2616 gimple *vec_stmt, slp_tree slp_node)
2618 tree vec_dest;
2619 tree scalar_dest;
2620 tree op, type;
2621 tree vec_oprnd0 = NULL_TREE;
2622 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2623 tree vectype;
2624 unsigned int nunits;
2625 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2626 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2627 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2628 tree fndecl, new_temp, def;
2629 gimple def_stmt;
2630 gimple new_stmt = NULL;
2631 int ncopies, j;
2632 vec<simd_call_arg_info> arginfo = vNULL;
2633 vec<tree> vargs = vNULL;
2634 size_t i, nargs;
2635 tree lhs, rtype, ratype;
2636 vec<constructor_elt, va_gc> *ret_ctor_elts;
2638 /* Is STMT a vectorizable call? */
2639 if (!is_gimple_call (stmt))
2640 return false;
2642 fndecl = gimple_call_fndecl (stmt);
2643 if (fndecl == NULL_TREE)
2644 return false;
2646 struct cgraph_node *node = cgraph_get_node (fndecl);
2647 if (node == NULL || node->simd_clones == NULL)
2648 return false;
2650 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2651 return false;
2653 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2654 return false;
2656 if (gimple_call_lhs (stmt)
2657 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2658 return false;
2660 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2662 vectype = STMT_VINFO_VECTYPE (stmt_info);
2664 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2665 return false;
2667 /* FORNOW */
2668 if (slp_node || PURE_SLP_STMT (stmt_info))
2669 return false;
2671 /* Process function arguments. */
2672 nargs = gimple_call_num_args (stmt);
2674 /* Bail out if the function has zero arguments. */
2675 if (nargs == 0)
2676 return false;
2678 arginfo.create (nargs);
2680 for (i = 0; i < nargs; i++)
2682 simd_call_arg_info thisarginfo;
2683 affine_iv iv;
2685 thisarginfo.linear_step = 0;
2686 thisarginfo.align = 0;
2687 thisarginfo.op = NULL_TREE;
2689 op = gimple_call_arg (stmt, i);
2690 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2691 &def_stmt, &def, &thisarginfo.dt,
2692 &thisarginfo.vectype)
2693 || thisarginfo.dt == vect_uninitialized_def)
2695 if (dump_enabled_p ())
2696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2697 "use not simple.\n");
2698 arginfo.release ();
2699 return false;
2702 if (thisarginfo.dt == vect_constant_def
2703 || thisarginfo.dt == vect_external_def)
2704 gcc_assert (thisarginfo.vectype == NULL_TREE);
2705 else
2706 gcc_assert (thisarginfo.vectype != NULL_TREE);
2708 if (thisarginfo.dt != vect_constant_def
2709 && thisarginfo.dt != vect_external_def
2710 && loop_vinfo
2711 && TREE_CODE (op) == SSA_NAME
2712 && simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false)
2713 && tree_fits_shwi_p (iv.step))
2715 thisarginfo.linear_step = tree_to_shwi (iv.step);
2716 thisarginfo.op = iv.base;
2718 else if ((thisarginfo.dt == vect_constant_def
2719 || thisarginfo.dt == vect_external_def)
2720 && POINTER_TYPE_P (TREE_TYPE (op)))
2721 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2723 arginfo.quick_push (thisarginfo);
2726 unsigned int badness = 0;
2727 struct cgraph_node *bestn = NULL;
2728 if (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info))
2729 bestn = cgraph_get_node (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info));
2730 else
2731 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2732 n = n->simdclone->next_clone)
2734 unsigned int this_badness = 0;
2735 if (n->simdclone->simdlen
2736 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2737 || n->simdclone->nargs != nargs)
2738 continue;
2739 if (n->simdclone->simdlen
2740 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2741 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2742 - exact_log2 (n->simdclone->simdlen)) * 1024;
2743 if (n->simdclone->inbranch)
2744 this_badness += 2048;
2745 int target_badness = targetm.simd_clone.usable (n);
2746 if (target_badness < 0)
2747 continue;
2748 this_badness += target_badness * 512;
2749 /* FORNOW: Have to add code to add the mask argument. */
2750 if (n->simdclone->inbranch)
2751 continue;
2752 for (i = 0; i < nargs; i++)
2754 switch (n->simdclone->args[i].arg_type)
2756 case SIMD_CLONE_ARG_TYPE_VECTOR:
2757 if (!useless_type_conversion_p
2758 (n->simdclone->args[i].orig_type,
2759 TREE_TYPE (gimple_call_arg (stmt, i))))
2760 i = -1;
2761 else if (arginfo[i].dt == vect_constant_def
2762 || arginfo[i].dt == vect_external_def
2763 || arginfo[i].linear_step)
2764 this_badness += 64;
2765 break;
2766 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2767 if (arginfo[i].dt != vect_constant_def
2768 && arginfo[i].dt != vect_external_def)
2769 i = -1;
2770 break;
2771 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2772 if (arginfo[i].dt == vect_constant_def
2773 || arginfo[i].dt == vect_external_def
2774 || (arginfo[i].linear_step
2775 != n->simdclone->args[i].linear_step))
2776 i = -1;
2777 break;
2778 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2779 /* FORNOW */
2780 i = -1;
2781 break;
2782 case SIMD_CLONE_ARG_TYPE_MASK:
2783 gcc_unreachable ();
2785 if (i == (size_t) -1)
2786 break;
2787 if (n->simdclone->args[i].alignment > arginfo[i].align)
2789 i = -1;
2790 break;
2792 if (arginfo[i].align)
2793 this_badness += (exact_log2 (arginfo[i].align)
2794 - exact_log2 (n->simdclone->args[i].alignment));
2796 if (i == (size_t) -1)
2797 continue;
2798 if (bestn == NULL || this_badness < badness)
2800 bestn = n;
2801 badness = this_badness;
2805 if (bestn == NULL)
2807 arginfo.release ();
2808 return false;
2811 for (i = 0; i < nargs; i++)
2812 if ((arginfo[i].dt == vect_constant_def
2813 || arginfo[i].dt == vect_external_def)
2814 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2816 arginfo[i].vectype
2817 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2818 i)));
2819 if (arginfo[i].vectype == NULL
2820 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2821 > bestn->simdclone->simdlen))
2823 arginfo.release ();
2824 return false;
2828 fndecl = bestn->decl;
2829 nunits = bestn->simdclone->simdlen;
2830 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2832 /* If the function isn't const, only allow it in simd loops where user
2833 has asserted that at least nunits consecutive iterations can be
2834 performed using SIMD instructions. */
2835 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2836 && gimple_vuse (stmt))
2838 arginfo.release ();
2839 return false;
2842 /* Sanity check: make sure that at least one copy of the vectorized stmt
2843 needs to be generated. */
2844 gcc_assert (ncopies >= 1);
2846 if (!vec_stmt) /* transformation not required. */
2848 STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info) = bestn->decl;
2849 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2850 if (dump_enabled_p ())
2851 dump_printf_loc (MSG_NOTE, vect_location,
2852 "=== vectorizable_simd_clone_call ===\n");
2853 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2854 arginfo.release ();
2855 return true;
2858 /** Transform. **/
2860 if (dump_enabled_p ())
2861 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2863 /* Handle def. */
2864 scalar_dest = gimple_call_lhs (stmt);
2865 vec_dest = NULL_TREE;
2866 rtype = NULL_TREE;
2867 ratype = NULL_TREE;
2868 if (scalar_dest)
2870 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2871 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2872 if (TREE_CODE (rtype) == ARRAY_TYPE)
2874 ratype = rtype;
2875 rtype = TREE_TYPE (ratype);
2879 prev_stmt_info = NULL;
2880 for (j = 0; j < ncopies; ++j)
2882 /* Build argument list for the vectorized call. */
2883 if (j == 0)
2884 vargs.create (nargs);
2885 else
2886 vargs.truncate (0);
2888 for (i = 0; i < nargs; i++)
2890 unsigned int k, l, m, o;
2891 tree atype;
2892 op = gimple_call_arg (stmt, i);
2893 switch (bestn->simdclone->args[i].arg_type)
2895 case SIMD_CLONE_ARG_TYPE_VECTOR:
2896 atype = bestn->simdclone->args[i].vector_type;
2897 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2898 for (m = j * o; m < (j + 1) * o; m++)
2900 if (TYPE_VECTOR_SUBPARTS (atype)
2901 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2903 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2904 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2905 / TYPE_VECTOR_SUBPARTS (atype));
2906 gcc_assert ((k & (k - 1)) == 0);
2907 if (m == 0)
2908 vec_oprnd0
2909 = vect_get_vec_def_for_operand (op, stmt, NULL);
2910 else
2912 vec_oprnd0 = arginfo[i].op;
2913 if ((m & (k - 1)) == 0)
2914 vec_oprnd0
2915 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2916 vec_oprnd0);
2918 arginfo[i].op = vec_oprnd0;
2919 vec_oprnd0
2920 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2921 size_int (prec),
2922 bitsize_int ((m & (k - 1)) * prec));
2923 new_stmt
2924 = gimple_build_assign (make_ssa_name (atype, NULL),
2925 vec_oprnd0);
2926 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2927 vargs.safe_push (gimple_assign_lhs (new_stmt));
2929 else
2931 k = (TYPE_VECTOR_SUBPARTS (atype)
2932 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
2933 gcc_assert ((k & (k - 1)) == 0);
2934 vec<constructor_elt, va_gc> *ctor_elts;
2935 if (k != 1)
2936 vec_alloc (ctor_elts, k);
2937 else
2938 ctor_elts = NULL;
2939 for (l = 0; l < k; l++)
2941 if (m == 0 && l == 0)
2942 vec_oprnd0
2943 = vect_get_vec_def_for_operand (op, stmt, NULL);
2944 else
2945 vec_oprnd0
2946 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2947 arginfo[i].op);
2948 arginfo[i].op = vec_oprnd0;
2949 if (k == 1)
2950 break;
2951 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
2952 vec_oprnd0);
2954 if (k == 1)
2955 vargs.safe_push (vec_oprnd0);
2956 else
2958 vec_oprnd0 = build_constructor (atype, ctor_elts);
2959 new_stmt
2960 = gimple_build_assign (make_ssa_name (atype, NULL),
2961 vec_oprnd0);
2962 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2963 vargs.safe_push (gimple_assign_lhs (new_stmt));
2967 break;
2968 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2969 vargs.safe_push (op);
2970 break;
2971 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2972 if (j == 0)
2974 gimple_seq stmts;
2975 arginfo[i].op
2976 = force_gimple_operand (arginfo[i].op, &stmts, true,
2977 NULL_TREE);
2978 if (stmts != NULL)
2980 basic_block new_bb;
2981 edge pe = loop_preheader_edge (loop);
2982 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2983 gcc_assert (!new_bb);
2985 tree phi_res = copy_ssa_name (op, NULL);
2986 gimple new_phi = create_phi_node (phi_res, loop->header);
2987 set_vinfo_for_stmt (new_phi,
2988 new_stmt_vec_info (new_phi, loop_vinfo,
2989 NULL));
2990 add_phi_arg (new_phi, arginfo[i].op,
2991 loop_preheader_edge (loop), UNKNOWN_LOCATION);
2992 enum tree_code code
2993 = POINTER_TYPE_P (TREE_TYPE (op))
2994 ? POINTER_PLUS_EXPR : PLUS_EXPR;
2995 tree type = POINTER_TYPE_P (TREE_TYPE (op))
2996 ? sizetype : TREE_TYPE (op);
2997 double_int cst
2998 = double_int::from_shwi
2999 (bestn->simdclone->args[i].linear_step);
3000 cst *= double_int::from_uhwi (ncopies * nunits);
3001 tree tcst = double_int_to_tree (type, cst);
3002 tree phi_arg = copy_ssa_name (op, NULL);
3003 new_stmt = gimple_build_assign_with_ops (code, phi_arg,
3004 phi_res, tcst);
3005 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3006 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3007 set_vinfo_for_stmt (new_stmt,
3008 new_stmt_vec_info (new_stmt, loop_vinfo,
3009 NULL));
3010 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3011 UNKNOWN_LOCATION);
3012 arginfo[i].op = phi_res;
3013 vargs.safe_push (phi_res);
3015 else
3017 enum tree_code code
3018 = POINTER_TYPE_P (TREE_TYPE (op))
3019 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3020 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3021 ? sizetype : TREE_TYPE (op);
3022 double_int cst
3023 = double_int::from_shwi
3024 (bestn->simdclone->args[i].linear_step);
3025 cst *= double_int::from_uhwi (j * nunits);
3026 tree tcst = double_int_to_tree (type, cst);
3027 new_temp = make_ssa_name (TREE_TYPE (op), NULL);
3028 new_stmt
3029 = gimple_build_assign_with_ops (code, new_temp,
3030 arginfo[i].op, tcst);
3031 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3032 vargs.safe_push (new_temp);
3034 break;
3035 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3036 default:
3037 gcc_unreachable ();
3041 new_stmt = gimple_build_call_vec (fndecl, vargs);
3042 if (vec_dest)
3044 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3045 if (ratype)
3046 new_temp = create_tmp_var (ratype, NULL);
3047 else if (TYPE_VECTOR_SUBPARTS (vectype)
3048 == TYPE_VECTOR_SUBPARTS (rtype))
3049 new_temp = make_ssa_name (vec_dest, new_stmt);
3050 else
3051 new_temp = make_ssa_name (rtype, new_stmt);
3052 gimple_call_set_lhs (new_stmt, new_temp);
3054 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3056 if (vec_dest)
3058 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3060 unsigned int k, l;
3061 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3062 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3063 gcc_assert ((k & (k - 1)) == 0);
3064 for (l = 0; l < k; l++)
3066 tree t;
3067 if (ratype)
3069 t = build_fold_addr_expr (new_temp);
3070 t = build2 (MEM_REF, vectype, t,
3071 build_int_cst (TREE_TYPE (t),
3072 l * prec / BITS_PER_UNIT));
3074 else
3075 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3076 size_int (prec), bitsize_int (l * prec));
3077 new_stmt
3078 = gimple_build_assign (make_ssa_name (vectype, NULL), t);
3079 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3080 if (j == 0 && l == 0)
3081 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3082 else
3083 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3085 prev_stmt_info = vinfo_for_stmt (new_stmt);
3088 if (ratype)
3090 tree clobber = build_constructor (ratype, NULL);
3091 TREE_THIS_VOLATILE (clobber) = 1;
3092 new_stmt = gimple_build_assign (new_temp, clobber);
3093 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3095 continue;
3097 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3099 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3100 / TYPE_VECTOR_SUBPARTS (rtype));
3101 gcc_assert ((k & (k - 1)) == 0);
3102 if ((j & (k - 1)) == 0)
3103 vec_alloc (ret_ctor_elts, k);
3104 if (ratype)
3106 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3107 for (m = 0; m < o; m++)
3109 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3110 size_int (m), NULL_TREE, NULL_TREE);
3111 new_stmt
3112 = gimple_build_assign (make_ssa_name (rtype, NULL),
3113 tem);
3114 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3115 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3116 gimple_assign_lhs (new_stmt));
3118 tree clobber = build_constructor (ratype, NULL);
3119 TREE_THIS_VOLATILE (clobber) = 1;
3120 new_stmt = gimple_build_assign (new_temp, clobber);
3121 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3123 else
3124 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3125 if ((j & (k - 1)) != k - 1)
3126 continue;
3127 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3128 new_stmt
3129 = gimple_build_assign (make_ssa_name (vec_dest, NULL),
3130 vec_oprnd0);
3131 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3133 if ((unsigned) j == k - 1)
3134 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3135 else
3136 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3138 prev_stmt_info = vinfo_for_stmt (new_stmt);
3139 continue;
3141 else if (ratype)
3143 tree t = build_fold_addr_expr (new_temp);
3144 t = build2 (MEM_REF, vectype, t,
3145 build_int_cst (TREE_TYPE (t), 0));
3146 new_stmt
3147 = gimple_build_assign (make_ssa_name (vec_dest, NULL), t);
3148 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3149 tree clobber = build_constructor (ratype, NULL);
3150 TREE_THIS_VOLATILE (clobber) = 1;
3151 vect_finish_stmt_generation (stmt,
3152 gimple_build_assign (new_temp,
3153 clobber), gsi);
3157 if (j == 0)
3158 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3159 else
3160 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3162 prev_stmt_info = vinfo_for_stmt (new_stmt);
3165 vargs.release ();
3167 /* The call in STMT might prevent it from being removed in dce.
3168 We however cannot remove it here, due to the way the ssa name
3169 it defines is mapped to the new definition. So just replace
3170 rhs of the statement with something harmless. */
3172 if (slp_node)
3173 return true;
3175 if (scalar_dest)
3177 type = TREE_TYPE (scalar_dest);
3178 if (is_pattern_stmt_p (stmt_info))
3179 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3180 else
3181 lhs = gimple_call_lhs (stmt);
3182 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3184 else
3185 new_stmt = gimple_build_nop ();
3186 set_vinfo_for_stmt (new_stmt, stmt_info);
3187 set_vinfo_for_stmt (stmt, NULL);
3188 STMT_VINFO_STMT (stmt_info) = new_stmt;
3189 gsi_replace (gsi, new_stmt, true);
3190 unlink_stmt_vdef (stmt);
3192 return true;
3196 /* Function vect_gen_widened_results_half
3198 Create a vector stmt whose code, type, number of arguments, and result
3199 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3200 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3201 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3202 needs to be created (DECL is a function-decl of a target-builtin).
3203 STMT is the original scalar stmt that we are vectorizing. */
3205 static gimple
3206 vect_gen_widened_results_half (enum tree_code code,
3207 tree decl,
3208 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3209 tree vec_dest, gimple_stmt_iterator *gsi,
3210 gimple stmt)
3212 gimple new_stmt;
3213 tree new_temp;
3215 /* Generate half of the widened result: */
3216 if (code == CALL_EXPR)
3218 /* Target specific support */
3219 if (op_type == binary_op)
3220 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3221 else
3222 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3223 new_temp = make_ssa_name (vec_dest, new_stmt);
3224 gimple_call_set_lhs (new_stmt, new_temp);
3226 else
3228 /* Generic support */
3229 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3230 if (op_type != binary_op)
3231 vec_oprnd1 = NULL;
3232 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
3233 vec_oprnd1);
3234 new_temp = make_ssa_name (vec_dest, new_stmt);
3235 gimple_assign_set_lhs (new_stmt, new_temp);
3237 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3239 return new_stmt;
3243 /* Get vectorized definitions for loop-based vectorization. For the first
3244 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3245 scalar operand), and for the rest we get a copy with
3246 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3247 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3248 The vectors are collected into VEC_OPRNDS. */
3250 static void
3251 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
3252 vec<tree> *vec_oprnds, int multi_step_cvt)
3254 tree vec_oprnd;
3256 /* Get first vector operand. */
3257 /* All the vector operands except the very first one (that is scalar oprnd)
3258 are stmt copies. */
3259 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3260 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3261 else
3262 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3264 vec_oprnds->quick_push (vec_oprnd);
3266 /* Get second vector operand. */
3267 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3268 vec_oprnds->quick_push (vec_oprnd);
3270 *oprnd = vec_oprnd;
3272 /* For conversion in multiple steps, continue to get operands
3273 recursively. */
3274 if (multi_step_cvt)
3275 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3279 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3280 For multi-step conversions store the resulting vectors and call the function
3281 recursively. */
3283 static void
3284 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3285 int multi_step_cvt, gimple stmt,
3286 vec<tree> vec_dsts,
3287 gimple_stmt_iterator *gsi,
3288 slp_tree slp_node, enum tree_code code,
3289 stmt_vec_info *prev_stmt_info)
3291 unsigned int i;
3292 tree vop0, vop1, new_tmp, vec_dest;
3293 gimple new_stmt;
3294 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3296 vec_dest = vec_dsts.pop ();
3298 for (i = 0; i < vec_oprnds->length (); i += 2)
3300 /* Create demotion operation. */
3301 vop0 = (*vec_oprnds)[i];
3302 vop1 = (*vec_oprnds)[i + 1];
3303 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3304 new_tmp = make_ssa_name (vec_dest, new_stmt);
3305 gimple_assign_set_lhs (new_stmt, new_tmp);
3306 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3308 if (multi_step_cvt)
3309 /* Store the resulting vector for next recursive call. */
3310 (*vec_oprnds)[i/2] = new_tmp;
3311 else
3313 /* This is the last step of the conversion sequence. Store the
3314 vectors in SLP_NODE or in vector info of the scalar statement
3315 (or in STMT_VINFO_RELATED_STMT chain). */
3316 if (slp_node)
3317 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3318 else
3320 if (!*prev_stmt_info)
3321 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3322 else
3323 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3325 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3330 /* For multi-step demotion operations we first generate demotion operations
3331 from the source type to the intermediate types, and then combine the
3332 results (stored in VEC_OPRNDS) in demotion operation to the destination
3333 type. */
3334 if (multi_step_cvt)
3336 /* At each level of recursion we have half of the operands we had at the
3337 previous level. */
3338 vec_oprnds->truncate ((i+1)/2);
3339 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3340 stmt, vec_dsts, gsi, slp_node,
3341 VEC_PACK_TRUNC_EXPR,
3342 prev_stmt_info);
3345 vec_dsts.quick_push (vec_dest);
3349 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3350 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3351 the resulting vectors and call the function recursively. */
3353 static void
3354 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3355 vec<tree> *vec_oprnds1,
3356 gimple stmt, tree vec_dest,
3357 gimple_stmt_iterator *gsi,
3358 enum tree_code code1,
3359 enum tree_code code2, tree decl1,
3360 tree decl2, int op_type)
3362 int i;
3363 tree vop0, vop1, new_tmp1, new_tmp2;
3364 gimple new_stmt1, new_stmt2;
3365 vec<tree> vec_tmp = vNULL;
3367 vec_tmp.create (vec_oprnds0->length () * 2);
3368 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3370 if (op_type == binary_op)
3371 vop1 = (*vec_oprnds1)[i];
3372 else
3373 vop1 = NULL_TREE;
3375 /* Generate the two halves of promotion operation. */
3376 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3377 op_type, vec_dest, gsi, stmt);
3378 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3379 op_type, vec_dest, gsi, stmt);
3380 if (is_gimple_call (new_stmt1))
3382 new_tmp1 = gimple_call_lhs (new_stmt1);
3383 new_tmp2 = gimple_call_lhs (new_stmt2);
3385 else
3387 new_tmp1 = gimple_assign_lhs (new_stmt1);
3388 new_tmp2 = gimple_assign_lhs (new_stmt2);
3391 /* Store the results for the next step. */
3392 vec_tmp.quick_push (new_tmp1);
3393 vec_tmp.quick_push (new_tmp2);
3396 vec_oprnds0->release ();
3397 *vec_oprnds0 = vec_tmp;
3401 /* Check if STMT performs a conversion operation, that can be vectorized.
3402 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3403 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3404 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3406 static bool
3407 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3408 gimple *vec_stmt, slp_tree slp_node)
3410 tree vec_dest;
3411 tree scalar_dest;
3412 tree op0, op1 = NULL_TREE;
3413 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3414 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3415 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3416 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3417 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3418 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3419 tree new_temp;
3420 tree def;
3421 gimple def_stmt;
3422 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3423 gimple new_stmt = NULL;
3424 stmt_vec_info prev_stmt_info;
3425 int nunits_in;
3426 int nunits_out;
3427 tree vectype_out, vectype_in;
3428 int ncopies, i, j;
3429 tree lhs_type, rhs_type;
3430 enum { NARROW, NONE, WIDEN } modifier;
3431 vec<tree> vec_oprnds0 = vNULL;
3432 vec<tree> vec_oprnds1 = vNULL;
3433 tree vop0;
3434 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3435 int multi_step_cvt = 0;
3436 vec<tree> vec_dsts = vNULL;
3437 vec<tree> interm_types = vNULL;
3438 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3439 int op_type;
3440 enum machine_mode rhs_mode;
3441 unsigned short fltsz;
3443 /* Is STMT a vectorizable conversion? */
3445 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3446 return false;
3448 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3449 return false;
3451 if (!is_gimple_assign (stmt))
3452 return false;
3454 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3455 return false;
3457 code = gimple_assign_rhs_code (stmt);
3458 if (!CONVERT_EXPR_CODE_P (code)
3459 && code != FIX_TRUNC_EXPR
3460 && code != FLOAT_EXPR
3461 && code != WIDEN_MULT_EXPR
3462 && code != WIDEN_LSHIFT_EXPR)
3463 return false;
3465 op_type = TREE_CODE_LENGTH (code);
3467 /* Check types of lhs and rhs. */
3468 scalar_dest = gimple_assign_lhs (stmt);
3469 lhs_type = TREE_TYPE (scalar_dest);
3470 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3472 op0 = gimple_assign_rhs1 (stmt);
3473 rhs_type = TREE_TYPE (op0);
3475 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3476 && !((INTEGRAL_TYPE_P (lhs_type)
3477 && INTEGRAL_TYPE_P (rhs_type))
3478 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3479 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3480 return false;
3482 if ((INTEGRAL_TYPE_P (lhs_type)
3483 && (TYPE_PRECISION (lhs_type)
3484 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3485 || (INTEGRAL_TYPE_P (rhs_type)
3486 && (TYPE_PRECISION (rhs_type)
3487 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3489 if (dump_enabled_p ())
3490 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3491 "type conversion to/from bit-precision unsupported."
3492 "\n");
3493 return false;
3496 /* Check the operands of the operation. */
3497 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3498 &def_stmt, &def, &dt[0], &vectype_in))
3500 if (dump_enabled_p ())
3501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3502 "use not simple.\n");
3503 return false;
3505 if (op_type == binary_op)
3507 bool ok;
3509 op1 = gimple_assign_rhs2 (stmt);
3510 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3511 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3512 OP1. */
3513 if (CONSTANT_CLASS_P (op0))
3514 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
3515 &def_stmt, &def, &dt[1], &vectype_in);
3516 else
3517 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3518 &def, &dt[1]);
3520 if (!ok)
3522 if (dump_enabled_p ())
3523 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3524 "use not simple.\n");
3525 return false;
3529 /* If op0 is an external or constant defs use a vector type of
3530 the same size as the output vector type. */
3531 if (!vectype_in)
3532 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3533 if (vec_stmt)
3534 gcc_assert (vectype_in);
3535 if (!vectype_in)
3537 if (dump_enabled_p ())
3539 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3540 "no vectype for scalar type ");
3541 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3542 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3545 return false;
3548 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3549 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3550 if (nunits_in < nunits_out)
3551 modifier = NARROW;
3552 else if (nunits_out == nunits_in)
3553 modifier = NONE;
3554 else
3555 modifier = WIDEN;
3557 /* Multiple types in SLP are handled by creating the appropriate number of
3558 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3559 case of SLP. */
3560 if (slp_node || PURE_SLP_STMT (stmt_info))
3561 ncopies = 1;
3562 else if (modifier == NARROW)
3563 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3564 else
3565 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3567 /* Sanity check: make sure that at least one copy of the vectorized stmt
3568 needs to be generated. */
3569 gcc_assert (ncopies >= 1);
3571 /* Supportable by target? */
3572 switch (modifier)
3574 case NONE:
3575 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3576 return false;
3577 if (supportable_convert_operation (code, vectype_out, vectype_in,
3578 &decl1, &code1))
3579 break;
3580 /* FALLTHRU */
3581 unsupported:
3582 if (dump_enabled_p ())
3583 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3584 "conversion not supported by target.\n");
3585 return false;
3587 case WIDEN:
3588 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3589 &code1, &code2, &multi_step_cvt,
3590 &interm_types))
3592 /* Binary widening operation can only be supported directly by the
3593 architecture. */
3594 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3595 break;
3598 if (code != FLOAT_EXPR
3599 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3600 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3601 goto unsupported;
3603 rhs_mode = TYPE_MODE (rhs_type);
3604 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3605 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3606 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3607 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3609 cvt_type
3610 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3611 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3612 if (cvt_type == NULL_TREE)
3613 goto unsupported;
3615 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3617 if (!supportable_convert_operation (code, vectype_out,
3618 cvt_type, &decl1, &codecvt1))
3619 goto unsupported;
3621 else if (!supportable_widening_operation (code, stmt, vectype_out,
3622 cvt_type, &codecvt1,
3623 &codecvt2, &multi_step_cvt,
3624 &interm_types))
3625 continue;
3626 else
3627 gcc_assert (multi_step_cvt == 0);
3629 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3630 vectype_in, &code1, &code2,
3631 &multi_step_cvt, &interm_types))
3632 break;
3635 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3636 goto unsupported;
3638 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3639 codecvt2 = ERROR_MARK;
3640 else
3642 multi_step_cvt++;
3643 interm_types.safe_push (cvt_type);
3644 cvt_type = NULL_TREE;
3646 break;
3648 case NARROW:
3649 gcc_assert (op_type == unary_op);
3650 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3651 &code1, &multi_step_cvt,
3652 &interm_types))
3653 break;
3655 if (code != FIX_TRUNC_EXPR
3656 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3657 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3658 goto unsupported;
3660 rhs_mode = TYPE_MODE (rhs_type);
3661 cvt_type
3662 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3663 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3664 if (cvt_type == NULL_TREE)
3665 goto unsupported;
3666 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3667 &decl1, &codecvt1))
3668 goto unsupported;
3669 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3670 &code1, &multi_step_cvt,
3671 &interm_types))
3672 break;
3673 goto unsupported;
3675 default:
3676 gcc_unreachable ();
3679 if (!vec_stmt) /* transformation not required. */
3681 if (dump_enabled_p ())
3682 dump_printf_loc (MSG_NOTE, vect_location,
3683 "=== vectorizable_conversion ===\n");
3684 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3686 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3687 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3689 else if (modifier == NARROW)
3691 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3692 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3694 else
3696 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3697 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3699 interm_types.release ();
3700 return true;
3703 /** Transform. **/
3704 if (dump_enabled_p ())
3705 dump_printf_loc (MSG_NOTE, vect_location,
3706 "transform conversion. ncopies = %d.\n", ncopies);
3708 if (op_type == binary_op)
3710 if (CONSTANT_CLASS_P (op0))
3711 op0 = fold_convert (TREE_TYPE (op1), op0);
3712 else if (CONSTANT_CLASS_P (op1))
3713 op1 = fold_convert (TREE_TYPE (op0), op1);
3716 /* In case of multi-step conversion, we first generate conversion operations
3717 to the intermediate types, and then from that types to the final one.
3718 We create vector destinations for the intermediate type (TYPES) received
3719 from supportable_*_operation, and store them in the correct order
3720 for future use in vect_create_vectorized_*_stmts (). */
3721 vec_dsts.create (multi_step_cvt + 1);
3722 vec_dest = vect_create_destination_var (scalar_dest,
3723 (cvt_type && modifier == WIDEN)
3724 ? cvt_type : vectype_out);
3725 vec_dsts.quick_push (vec_dest);
3727 if (multi_step_cvt)
3729 for (i = interm_types.length () - 1;
3730 interm_types.iterate (i, &intermediate_type); i--)
3732 vec_dest = vect_create_destination_var (scalar_dest,
3733 intermediate_type);
3734 vec_dsts.quick_push (vec_dest);
3738 if (cvt_type)
3739 vec_dest = vect_create_destination_var (scalar_dest,
3740 modifier == WIDEN
3741 ? vectype_out : cvt_type);
3743 if (!slp_node)
3745 if (modifier == WIDEN)
3747 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3748 if (op_type == binary_op)
3749 vec_oprnds1.create (1);
3751 else if (modifier == NARROW)
3752 vec_oprnds0.create (
3753 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3755 else if (code == WIDEN_LSHIFT_EXPR)
3756 vec_oprnds1.create (slp_node->vec_stmts_size);
3758 last_oprnd = op0;
3759 prev_stmt_info = NULL;
3760 switch (modifier)
3762 case NONE:
3763 for (j = 0; j < ncopies; j++)
3765 if (j == 0)
3766 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3767 -1);
3768 else
3769 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3771 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3773 /* Arguments are ready, create the new vector stmt. */
3774 if (code1 == CALL_EXPR)
3776 new_stmt = gimple_build_call (decl1, 1, vop0);
3777 new_temp = make_ssa_name (vec_dest, new_stmt);
3778 gimple_call_set_lhs (new_stmt, new_temp);
3780 else
3782 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3783 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
3784 vop0, NULL);
3785 new_temp = make_ssa_name (vec_dest, new_stmt);
3786 gimple_assign_set_lhs (new_stmt, new_temp);
3789 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3790 if (slp_node)
3791 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3794 if (j == 0)
3795 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3796 else
3797 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3798 prev_stmt_info = vinfo_for_stmt (new_stmt);
3800 break;
3802 case WIDEN:
3803 /* In case the vectorization factor (VF) is bigger than the number
3804 of elements that we can fit in a vectype (nunits), we have to
3805 generate more than one vector stmt - i.e - we need to "unroll"
3806 the vector stmt by a factor VF/nunits. */
3807 for (j = 0; j < ncopies; j++)
3809 /* Handle uses. */
3810 if (j == 0)
3812 if (slp_node)
3814 if (code == WIDEN_LSHIFT_EXPR)
3816 unsigned int k;
3818 vec_oprnd1 = op1;
3819 /* Store vec_oprnd1 for every vector stmt to be created
3820 for SLP_NODE. We check during the analysis that all
3821 the shift arguments are the same. */
3822 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3823 vec_oprnds1.quick_push (vec_oprnd1);
3825 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3826 slp_node, -1);
3828 else
3829 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3830 &vec_oprnds1, slp_node, -1);
3832 else
3834 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3835 vec_oprnds0.quick_push (vec_oprnd0);
3836 if (op_type == binary_op)
3838 if (code == WIDEN_LSHIFT_EXPR)
3839 vec_oprnd1 = op1;
3840 else
3841 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3842 NULL);
3843 vec_oprnds1.quick_push (vec_oprnd1);
3847 else
3849 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3850 vec_oprnds0.truncate (0);
3851 vec_oprnds0.quick_push (vec_oprnd0);
3852 if (op_type == binary_op)
3854 if (code == WIDEN_LSHIFT_EXPR)
3855 vec_oprnd1 = op1;
3856 else
3857 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3858 vec_oprnd1);
3859 vec_oprnds1.truncate (0);
3860 vec_oprnds1.quick_push (vec_oprnd1);
3864 /* Arguments are ready. Create the new vector stmts. */
3865 for (i = multi_step_cvt; i >= 0; i--)
3867 tree this_dest = vec_dsts[i];
3868 enum tree_code c1 = code1, c2 = code2;
3869 if (i == 0 && codecvt2 != ERROR_MARK)
3871 c1 = codecvt1;
3872 c2 = codecvt2;
3874 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3875 &vec_oprnds1,
3876 stmt, this_dest, gsi,
3877 c1, c2, decl1, decl2,
3878 op_type);
3881 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3883 if (cvt_type)
3885 if (codecvt1 == CALL_EXPR)
3887 new_stmt = gimple_build_call (decl1, 1, vop0);
3888 new_temp = make_ssa_name (vec_dest, new_stmt);
3889 gimple_call_set_lhs (new_stmt, new_temp);
3891 else
3893 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3894 new_temp = make_ssa_name (vec_dest, NULL);
3895 new_stmt = gimple_build_assign_with_ops (codecvt1,
3896 new_temp,
3897 vop0, NULL);
3900 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3902 else
3903 new_stmt = SSA_NAME_DEF_STMT (vop0);
3905 if (slp_node)
3906 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3907 else
3909 if (!prev_stmt_info)
3910 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3911 else
3912 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3913 prev_stmt_info = vinfo_for_stmt (new_stmt);
3918 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3919 break;
3921 case NARROW:
3922 /* In case the vectorization factor (VF) is bigger than the number
3923 of elements that we can fit in a vectype (nunits), we have to
3924 generate more than one vector stmt - i.e - we need to "unroll"
3925 the vector stmt by a factor VF/nunits. */
3926 for (j = 0; j < ncopies; j++)
3928 /* Handle uses. */
3929 if (slp_node)
3930 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3931 slp_node, -1);
3932 else
3934 vec_oprnds0.truncate (0);
3935 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3936 vect_pow2 (multi_step_cvt) - 1);
3939 /* Arguments are ready. Create the new vector stmts. */
3940 if (cvt_type)
3941 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3943 if (codecvt1 == CALL_EXPR)
3945 new_stmt = gimple_build_call (decl1, 1, vop0);
3946 new_temp = make_ssa_name (vec_dest, new_stmt);
3947 gimple_call_set_lhs (new_stmt, new_temp);
3949 else
3951 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3952 new_temp = make_ssa_name (vec_dest, NULL);
3953 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
3954 vop0, NULL);
3957 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3958 vec_oprnds0[i] = new_temp;
3961 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
3962 stmt, vec_dsts, gsi,
3963 slp_node, code1,
3964 &prev_stmt_info);
3967 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3968 break;
3971 vec_oprnds0.release ();
3972 vec_oprnds1.release ();
3973 vec_dsts.release ();
3974 interm_types.release ();
3976 return true;
3980 /* Function vectorizable_assignment.
3982 Check if STMT performs an assignment (copy) that can be vectorized.
3983 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3984 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3985 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3987 static bool
3988 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
3989 gimple *vec_stmt, slp_tree slp_node)
3991 tree vec_dest;
3992 tree scalar_dest;
3993 tree op;
3994 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3995 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3996 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3997 tree new_temp;
3998 tree def;
3999 gimple def_stmt;
4000 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4001 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4002 int ncopies;
4003 int i, j;
4004 vec<tree> vec_oprnds = vNULL;
4005 tree vop;
4006 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4007 gimple new_stmt = NULL;
4008 stmt_vec_info prev_stmt_info = NULL;
4009 enum tree_code code;
4010 tree vectype_in;
4012 /* Multiple types in SLP are handled by creating the appropriate number of
4013 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4014 case of SLP. */
4015 if (slp_node || PURE_SLP_STMT (stmt_info))
4016 ncopies = 1;
4017 else
4018 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4020 gcc_assert (ncopies >= 1);
4022 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4023 return false;
4025 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4026 return false;
4028 /* Is vectorizable assignment? */
4029 if (!is_gimple_assign (stmt))
4030 return false;
4032 scalar_dest = gimple_assign_lhs (stmt);
4033 if (TREE_CODE (scalar_dest) != SSA_NAME)
4034 return false;
4036 code = gimple_assign_rhs_code (stmt);
4037 if (gimple_assign_single_p (stmt)
4038 || code == PAREN_EXPR
4039 || CONVERT_EXPR_CODE_P (code))
4040 op = gimple_assign_rhs1 (stmt);
4041 else
4042 return false;
4044 if (code == VIEW_CONVERT_EXPR)
4045 op = TREE_OPERAND (op, 0);
4047 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
4048 &def_stmt, &def, &dt[0], &vectype_in))
4050 if (dump_enabled_p ())
4051 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4052 "use not simple.\n");
4053 return false;
4056 /* We can handle NOP_EXPR conversions that do not change the number
4057 of elements or the vector size. */
4058 if ((CONVERT_EXPR_CODE_P (code)
4059 || code == VIEW_CONVERT_EXPR)
4060 && (!vectype_in
4061 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4062 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4063 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4064 return false;
4066 /* We do not handle bit-precision changes. */
4067 if ((CONVERT_EXPR_CODE_P (code)
4068 || code == VIEW_CONVERT_EXPR)
4069 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4070 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4071 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4072 || ((TYPE_PRECISION (TREE_TYPE (op))
4073 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4074 /* But a conversion that does not change the bit-pattern is ok. */
4075 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4076 > TYPE_PRECISION (TREE_TYPE (op)))
4077 && TYPE_UNSIGNED (TREE_TYPE (op))))
4079 if (dump_enabled_p ())
4080 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4081 "type conversion to/from bit-precision "
4082 "unsupported.\n");
4083 return false;
4086 if (!vec_stmt) /* transformation not required. */
4088 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4089 if (dump_enabled_p ())
4090 dump_printf_loc (MSG_NOTE, vect_location,
4091 "=== vectorizable_assignment ===\n");
4092 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4093 return true;
4096 /** Transform. **/
4097 if (dump_enabled_p ())
4098 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4100 /* Handle def. */
4101 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4103 /* Handle use. */
4104 for (j = 0; j < ncopies; j++)
4106 /* Handle uses. */
4107 if (j == 0)
4108 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4109 else
4110 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4112 /* Arguments are ready. create the new vector stmt. */
4113 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4115 if (CONVERT_EXPR_CODE_P (code)
4116 || code == VIEW_CONVERT_EXPR)
4117 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4118 new_stmt = gimple_build_assign (vec_dest, vop);
4119 new_temp = make_ssa_name (vec_dest, new_stmt);
4120 gimple_assign_set_lhs (new_stmt, new_temp);
4121 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4122 if (slp_node)
4123 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4126 if (slp_node)
4127 continue;
4129 if (j == 0)
4130 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4131 else
4132 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4134 prev_stmt_info = vinfo_for_stmt (new_stmt);
4137 vec_oprnds.release ();
4138 return true;
4142 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4143 either as shift by a scalar or by a vector. */
4145 bool
4146 vect_supportable_shift (enum tree_code code, tree scalar_type)
4149 enum machine_mode vec_mode;
4150 optab optab;
4151 int icode;
4152 tree vectype;
4154 vectype = get_vectype_for_scalar_type (scalar_type);
4155 if (!vectype)
4156 return false;
4158 optab = optab_for_tree_code (code, vectype, optab_scalar);
4159 if (!optab
4160 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4162 optab = optab_for_tree_code (code, vectype, optab_vector);
4163 if (!optab
4164 || (optab_handler (optab, TYPE_MODE (vectype))
4165 == CODE_FOR_nothing))
4166 return false;
4169 vec_mode = TYPE_MODE (vectype);
4170 icode = (int) optab_handler (optab, vec_mode);
4171 if (icode == CODE_FOR_nothing)
4172 return false;
4174 return true;
4178 /* Function vectorizable_shift.
4180 Check if STMT performs a shift operation that can be vectorized.
4181 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4182 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4183 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4185 static bool
4186 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4187 gimple *vec_stmt, slp_tree slp_node)
4189 tree vec_dest;
4190 tree scalar_dest;
4191 tree op0, op1 = NULL;
4192 tree vec_oprnd1 = NULL_TREE;
4193 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4194 tree vectype;
4195 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4196 enum tree_code code;
4197 enum machine_mode vec_mode;
4198 tree new_temp;
4199 optab optab;
4200 int icode;
4201 enum machine_mode optab_op2_mode;
4202 tree def;
4203 gimple def_stmt;
4204 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4205 gimple new_stmt = NULL;
4206 stmt_vec_info prev_stmt_info;
4207 int nunits_in;
4208 int nunits_out;
4209 tree vectype_out;
4210 tree op1_vectype;
4211 int ncopies;
4212 int j, i;
4213 vec<tree> vec_oprnds0 = vNULL;
4214 vec<tree> vec_oprnds1 = vNULL;
4215 tree vop0, vop1;
4216 unsigned int k;
4217 bool scalar_shift_arg = true;
4218 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4219 int vf;
4221 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4222 return false;
4224 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4225 return false;
4227 /* Is STMT a vectorizable binary/unary operation? */
4228 if (!is_gimple_assign (stmt))
4229 return false;
4231 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4232 return false;
4234 code = gimple_assign_rhs_code (stmt);
4236 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4237 || code == RROTATE_EXPR))
4238 return false;
4240 scalar_dest = gimple_assign_lhs (stmt);
4241 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4242 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4243 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4245 if (dump_enabled_p ())
4246 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4247 "bit-precision shifts not supported.\n");
4248 return false;
4251 op0 = gimple_assign_rhs1 (stmt);
4252 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4253 &def_stmt, &def, &dt[0], &vectype))
4255 if (dump_enabled_p ())
4256 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4257 "use not simple.\n");
4258 return false;
4260 /* If op0 is an external or constant def use a vector type with
4261 the same size as the output vector type. */
4262 if (!vectype)
4263 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4264 if (vec_stmt)
4265 gcc_assert (vectype);
4266 if (!vectype)
4268 if (dump_enabled_p ())
4269 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4270 "no vectype for scalar type\n");
4271 return false;
4274 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4275 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4276 if (nunits_out != nunits_in)
4277 return false;
4279 op1 = gimple_assign_rhs2 (stmt);
4280 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4281 &def, &dt[1], &op1_vectype))
4283 if (dump_enabled_p ())
4284 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4285 "use not simple.\n");
4286 return false;
4289 if (loop_vinfo)
4290 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4291 else
4292 vf = 1;
4294 /* Multiple types in SLP are handled by creating the appropriate number of
4295 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4296 case of SLP. */
4297 if (slp_node || PURE_SLP_STMT (stmt_info))
4298 ncopies = 1;
4299 else
4300 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4302 gcc_assert (ncopies >= 1);
4304 /* Determine whether the shift amount is a vector, or scalar. If the
4305 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4307 if (dt[1] == vect_internal_def && !slp_node)
4308 scalar_shift_arg = false;
4309 else if (dt[1] == vect_constant_def
4310 || dt[1] == vect_external_def
4311 || dt[1] == vect_internal_def)
4313 /* In SLP, need to check whether the shift count is the same,
4314 in loops if it is a constant or invariant, it is always
4315 a scalar shift. */
4316 if (slp_node)
4318 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4319 gimple slpstmt;
4321 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4322 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4323 scalar_shift_arg = false;
4326 else
4328 if (dump_enabled_p ())
4329 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4330 "operand mode requires invariant argument.\n");
4331 return false;
4334 /* Vector shifted by vector. */
4335 if (!scalar_shift_arg)
4337 optab = optab_for_tree_code (code, vectype, optab_vector);
4338 if (dump_enabled_p ())
4339 dump_printf_loc (MSG_NOTE, vect_location,
4340 "vector/vector shift/rotate found.\n");
4342 if (!op1_vectype)
4343 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4344 if (op1_vectype == NULL_TREE
4345 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4347 if (dump_enabled_p ())
4348 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4349 "unusable type for last operand in"
4350 " vector/vector shift/rotate.\n");
4351 return false;
4354 /* See if the machine has a vector shifted by scalar insn and if not
4355 then see if it has a vector shifted by vector insn. */
4356 else
4358 optab = optab_for_tree_code (code, vectype, optab_scalar);
4359 if (optab
4360 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4362 if (dump_enabled_p ())
4363 dump_printf_loc (MSG_NOTE, vect_location,
4364 "vector/scalar shift/rotate found.\n");
4366 else
4368 optab = optab_for_tree_code (code, vectype, optab_vector);
4369 if (optab
4370 && (optab_handler (optab, TYPE_MODE (vectype))
4371 != CODE_FOR_nothing))
4373 scalar_shift_arg = false;
4375 if (dump_enabled_p ())
4376 dump_printf_loc (MSG_NOTE, vect_location,
4377 "vector/vector shift/rotate found.\n");
4379 /* Unlike the other binary operators, shifts/rotates have
4380 the rhs being int, instead of the same type as the lhs,
4381 so make sure the scalar is the right type if we are
4382 dealing with vectors of long long/long/short/char. */
4383 if (dt[1] == vect_constant_def)
4384 op1 = fold_convert (TREE_TYPE (vectype), op1);
4385 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4386 TREE_TYPE (op1)))
4388 if (slp_node
4389 && TYPE_MODE (TREE_TYPE (vectype))
4390 != TYPE_MODE (TREE_TYPE (op1)))
4392 if (dump_enabled_p ())
4393 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4394 "unusable type for last operand in"
4395 " vector/vector shift/rotate.\n");
4396 return false;
4398 if (vec_stmt && !slp_node)
4400 op1 = fold_convert (TREE_TYPE (vectype), op1);
4401 op1 = vect_init_vector (stmt, op1,
4402 TREE_TYPE (vectype), NULL);
4409 /* Supportable by target? */
4410 if (!optab)
4412 if (dump_enabled_p ())
4413 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4414 "no optab.\n");
4415 return false;
4417 vec_mode = TYPE_MODE (vectype);
4418 icode = (int) optab_handler (optab, vec_mode);
4419 if (icode == CODE_FOR_nothing)
4421 if (dump_enabled_p ())
4422 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4423 "op not supported by target.\n");
4424 /* Check only during analysis. */
4425 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4426 || (vf < vect_min_worthwhile_factor (code)
4427 && !vec_stmt))
4428 return false;
4429 if (dump_enabled_p ())
4430 dump_printf_loc (MSG_NOTE, vect_location,
4431 "proceeding using word mode.\n");
4434 /* Worthwhile without SIMD support? Check only during analysis. */
4435 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4436 && vf < vect_min_worthwhile_factor (code)
4437 && !vec_stmt)
4439 if (dump_enabled_p ())
4440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4441 "not worthwhile without SIMD support.\n");
4442 return false;
4445 if (!vec_stmt) /* transformation not required. */
4447 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4448 if (dump_enabled_p ())
4449 dump_printf_loc (MSG_NOTE, vect_location,
4450 "=== vectorizable_shift ===\n");
4451 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4452 return true;
4455 /** Transform. **/
4457 if (dump_enabled_p ())
4458 dump_printf_loc (MSG_NOTE, vect_location,
4459 "transform binary/unary operation.\n");
4461 /* Handle def. */
4462 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4464 prev_stmt_info = NULL;
4465 for (j = 0; j < ncopies; j++)
4467 /* Handle uses. */
4468 if (j == 0)
4470 if (scalar_shift_arg)
4472 /* Vector shl and shr insn patterns can be defined with scalar
4473 operand 2 (shift operand). In this case, use constant or loop
4474 invariant op1 directly, without extending it to vector mode
4475 first. */
4476 optab_op2_mode = insn_data[icode].operand[2].mode;
4477 if (!VECTOR_MODE_P (optab_op2_mode))
4479 if (dump_enabled_p ())
4480 dump_printf_loc (MSG_NOTE, vect_location,
4481 "operand 1 using scalar mode.\n");
4482 vec_oprnd1 = op1;
4483 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4484 vec_oprnds1.quick_push (vec_oprnd1);
4485 if (slp_node)
4487 /* Store vec_oprnd1 for every vector stmt to be created
4488 for SLP_NODE. We check during the analysis that all
4489 the shift arguments are the same.
4490 TODO: Allow different constants for different vector
4491 stmts generated for an SLP instance. */
4492 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4493 vec_oprnds1.quick_push (vec_oprnd1);
4498 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4499 (a special case for certain kind of vector shifts); otherwise,
4500 operand 1 should be of a vector type (the usual case). */
4501 if (vec_oprnd1)
4502 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4503 slp_node, -1);
4504 else
4505 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4506 slp_node, -1);
4508 else
4509 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4511 /* Arguments are ready. Create the new vector stmt. */
4512 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4514 vop1 = vec_oprnds1[i];
4515 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
4516 new_temp = make_ssa_name (vec_dest, new_stmt);
4517 gimple_assign_set_lhs (new_stmt, new_temp);
4518 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4519 if (slp_node)
4520 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4523 if (slp_node)
4524 continue;
4526 if (j == 0)
4527 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4528 else
4529 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4530 prev_stmt_info = vinfo_for_stmt (new_stmt);
4533 vec_oprnds0.release ();
4534 vec_oprnds1.release ();
4536 return true;
4540 /* Function vectorizable_operation.
4542 Check if STMT performs a binary, unary or ternary operation that can
4543 be vectorized.
4544 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4545 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4546 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4548 static bool
4549 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4550 gimple *vec_stmt, slp_tree slp_node)
4552 tree vec_dest;
4553 tree scalar_dest;
4554 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4555 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4556 tree vectype;
4557 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4558 enum tree_code code;
4559 enum machine_mode vec_mode;
4560 tree new_temp;
4561 int op_type;
4562 optab optab;
4563 int icode;
4564 tree def;
4565 gimple def_stmt;
4566 enum vect_def_type dt[3]
4567 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4568 gimple new_stmt = NULL;
4569 stmt_vec_info prev_stmt_info;
4570 int nunits_in;
4571 int nunits_out;
4572 tree vectype_out;
4573 int ncopies;
4574 int j, i;
4575 vec<tree> vec_oprnds0 = vNULL;
4576 vec<tree> vec_oprnds1 = vNULL;
4577 vec<tree> vec_oprnds2 = vNULL;
4578 tree vop0, vop1, vop2;
4579 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4580 int vf;
4582 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4583 return false;
4585 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4586 return false;
4588 /* Is STMT a vectorizable binary/unary operation? */
4589 if (!is_gimple_assign (stmt))
4590 return false;
4592 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4593 return false;
4595 code = gimple_assign_rhs_code (stmt);
4597 /* For pointer addition, we should use the normal plus for
4598 the vector addition. */
4599 if (code == POINTER_PLUS_EXPR)
4600 code = PLUS_EXPR;
4602 /* Support only unary or binary operations. */
4603 op_type = TREE_CODE_LENGTH (code);
4604 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4606 if (dump_enabled_p ())
4607 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4608 "num. args = %d (not unary/binary/ternary op).\n",
4609 op_type);
4610 return false;
4613 scalar_dest = gimple_assign_lhs (stmt);
4614 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4616 /* Most operations cannot handle bit-precision types without extra
4617 truncations. */
4618 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4619 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4620 /* Exception are bitwise binary operations. */
4621 && code != BIT_IOR_EXPR
4622 && code != BIT_XOR_EXPR
4623 && code != BIT_AND_EXPR)
4625 if (dump_enabled_p ())
4626 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4627 "bit-precision arithmetic not supported.\n");
4628 return false;
4631 op0 = gimple_assign_rhs1 (stmt);
4632 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4633 &def_stmt, &def, &dt[0], &vectype))
4635 if (dump_enabled_p ())
4636 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4637 "use not simple.\n");
4638 return false;
4640 /* If op0 is an external or constant def use a vector type with
4641 the same size as the output vector type. */
4642 if (!vectype)
4643 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4644 if (vec_stmt)
4645 gcc_assert (vectype);
4646 if (!vectype)
4648 if (dump_enabled_p ())
4650 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4651 "no vectype for scalar type ");
4652 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4653 TREE_TYPE (op0));
4654 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4657 return false;
4660 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4661 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4662 if (nunits_out != nunits_in)
4663 return false;
4665 if (op_type == binary_op || op_type == ternary_op)
4667 op1 = gimple_assign_rhs2 (stmt);
4668 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4669 &def, &dt[1]))
4671 if (dump_enabled_p ())
4672 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4673 "use not simple.\n");
4674 return false;
4677 if (op_type == ternary_op)
4679 op2 = gimple_assign_rhs3 (stmt);
4680 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4681 &def, &dt[2]))
4683 if (dump_enabled_p ())
4684 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4685 "use not simple.\n");
4686 return false;
4690 if (loop_vinfo)
4691 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4692 else
4693 vf = 1;
4695 /* Multiple types in SLP are handled by creating the appropriate number of
4696 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4697 case of SLP. */
4698 if (slp_node || PURE_SLP_STMT (stmt_info))
4699 ncopies = 1;
4700 else
4701 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4703 gcc_assert (ncopies >= 1);
4705 /* Shifts are handled in vectorizable_shift (). */
4706 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4707 || code == RROTATE_EXPR)
4708 return false;
4710 /* Supportable by target? */
4712 vec_mode = TYPE_MODE (vectype);
4713 if (code == MULT_HIGHPART_EXPR)
4715 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
4716 icode = LAST_INSN_CODE;
4717 else
4718 icode = CODE_FOR_nothing;
4720 else
4722 optab = optab_for_tree_code (code, vectype, optab_default);
4723 if (!optab)
4725 if (dump_enabled_p ())
4726 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4727 "no optab.\n");
4728 return false;
4730 icode = (int) optab_handler (optab, vec_mode);
4733 if (icode == CODE_FOR_nothing)
4735 if (dump_enabled_p ())
4736 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4737 "op not supported by target.\n");
4738 /* Check only during analysis. */
4739 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4740 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4741 return false;
4742 if (dump_enabled_p ())
4743 dump_printf_loc (MSG_NOTE, vect_location,
4744 "proceeding using word mode.\n");
4747 /* Worthwhile without SIMD support? Check only during analysis. */
4748 if (!VECTOR_MODE_P (vec_mode)
4749 && !vec_stmt
4750 && vf < vect_min_worthwhile_factor (code))
4752 if (dump_enabled_p ())
4753 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4754 "not worthwhile without SIMD support.\n");
4755 return false;
4758 if (!vec_stmt) /* transformation not required. */
4760 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
4761 if (dump_enabled_p ())
4762 dump_printf_loc (MSG_NOTE, vect_location,
4763 "=== vectorizable_operation ===\n");
4764 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4765 return true;
4768 /** Transform. **/
4770 if (dump_enabled_p ())
4771 dump_printf_loc (MSG_NOTE, vect_location,
4772 "transform binary/unary operation.\n");
4774 /* Handle def. */
4775 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4777 /* In case the vectorization factor (VF) is bigger than the number
4778 of elements that we can fit in a vectype (nunits), we have to generate
4779 more than one vector stmt - i.e - we need to "unroll" the
4780 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4781 from one copy of the vector stmt to the next, in the field
4782 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4783 stages to find the correct vector defs to be used when vectorizing
4784 stmts that use the defs of the current stmt. The example below
4785 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4786 we need to create 4 vectorized stmts):
4788 before vectorization:
4789 RELATED_STMT VEC_STMT
4790 S1: x = memref - -
4791 S2: z = x + 1 - -
4793 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4794 there):
4795 RELATED_STMT VEC_STMT
4796 VS1_0: vx0 = memref0 VS1_1 -
4797 VS1_1: vx1 = memref1 VS1_2 -
4798 VS1_2: vx2 = memref2 VS1_3 -
4799 VS1_3: vx3 = memref3 - -
4800 S1: x = load - VS1_0
4801 S2: z = x + 1 - -
4803 step2: vectorize stmt S2 (done here):
4804 To vectorize stmt S2 we first need to find the relevant vector
4805 def for the first operand 'x'. This is, as usual, obtained from
4806 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4807 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4808 relevant vector def 'vx0'. Having found 'vx0' we can generate
4809 the vector stmt VS2_0, and as usual, record it in the
4810 STMT_VINFO_VEC_STMT of stmt S2.
4811 When creating the second copy (VS2_1), we obtain the relevant vector
4812 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4813 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4814 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4815 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4816 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4817 chain of stmts and pointers:
4818 RELATED_STMT VEC_STMT
4819 VS1_0: vx0 = memref0 VS1_1 -
4820 VS1_1: vx1 = memref1 VS1_2 -
4821 VS1_2: vx2 = memref2 VS1_3 -
4822 VS1_3: vx3 = memref3 - -
4823 S1: x = load - VS1_0
4824 VS2_0: vz0 = vx0 + v1 VS2_1 -
4825 VS2_1: vz1 = vx1 + v1 VS2_2 -
4826 VS2_2: vz2 = vx2 + v1 VS2_3 -
4827 VS2_3: vz3 = vx3 + v1 - -
4828 S2: z = x + 1 - VS2_0 */
4830 prev_stmt_info = NULL;
4831 for (j = 0; j < ncopies; j++)
4833 /* Handle uses. */
4834 if (j == 0)
4836 if (op_type == binary_op || op_type == ternary_op)
4837 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4838 slp_node, -1);
4839 else
4840 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4841 slp_node, -1);
4842 if (op_type == ternary_op)
4844 vec_oprnds2.create (1);
4845 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4846 stmt,
4847 NULL));
4850 else
4852 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4853 if (op_type == ternary_op)
4855 tree vec_oprnd = vec_oprnds2.pop ();
4856 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4857 vec_oprnd));
4861 /* Arguments are ready. Create the new vector stmt. */
4862 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4864 vop1 = ((op_type == binary_op || op_type == ternary_op)
4865 ? vec_oprnds1[i] : NULL_TREE);
4866 vop2 = ((op_type == ternary_op)
4867 ? vec_oprnds2[i] : NULL_TREE);
4868 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
4869 vop0, vop1, vop2);
4870 new_temp = make_ssa_name (vec_dest, new_stmt);
4871 gimple_assign_set_lhs (new_stmt, new_temp);
4872 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4873 if (slp_node)
4874 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4877 if (slp_node)
4878 continue;
4880 if (j == 0)
4881 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4882 else
4883 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4884 prev_stmt_info = vinfo_for_stmt (new_stmt);
4887 vec_oprnds0.release ();
4888 vec_oprnds1.release ();
4889 vec_oprnds2.release ();
4891 return true;
4894 /* A helper function to ensure data reference DR's base alignment
4895 for STMT_INFO. */
4897 static void
4898 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4900 if (!dr->aux)
4901 return;
4903 if (((dataref_aux *)dr->aux)->base_misaligned)
4905 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4906 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
4908 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4909 DECL_USER_ALIGN (base_decl) = 1;
4910 ((dataref_aux *)dr->aux)->base_misaligned = false;
4915 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4916 reversal of the vector elements. If that is impossible to do,
4917 returns NULL. */
4919 static tree
4920 perm_mask_for_reverse (tree vectype)
4922 int i, nunits;
4923 unsigned char *sel;
4925 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4926 sel = XALLOCAVEC (unsigned char, nunits);
4928 for (i = 0; i < nunits; ++i)
4929 sel[i] = nunits - 1 - i;
4931 return vect_gen_perm_mask (vectype, sel);
4934 /* Function vectorizable_store.
4936 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4937 can be vectorized.
4938 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4939 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4940 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4942 static bool
4943 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4944 slp_tree slp_node)
4946 tree scalar_dest;
4947 tree data_ref;
4948 tree op;
4949 tree vec_oprnd = NULL_TREE;
4950 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4951 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
4952 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4953 tree elem_type;
4954 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4955 struct loop *loop = NULL;
4956 enum machine_mode vec_mode;
4957 tree dummy;
4958 enum dr_alignment_support alignment_support_scheme;
4959 tree def;
4960 gimple def_stmt;
4961 enum vect_def_type dt;
4962 stmt_vec_info prev_stmt_info = NULL;
4963 tree dataref_ptr = NULL_TREE;
4964 tree dataref_offset = NULL_TREE;
4965 gimple ptr_incr = NULL;
4966 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4967 int ncopies;
4968 int j;
4969 gimple next_stmt, first_stmt = NULL;
4970 bool grouped_store = false;
4971 bool store_lanes_p = false;
4972 unsigned int group_size, i;
4973 vec<tree> dr_chain = vNULL;
4974 vec<tree> oprnds = vNULL;
4975 vec<tree> result_chain = vNULL;
4976 bool inv_p;
4977 bool negative = false;
4978 tree offset = NULL_TREE;
4979 vec<tree> vec_oprnds = vNULL;
4980 bool slp = (slp_node != NULL);
4981 unsigned int vec_num;
4982 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4983 tree aggr_type;
4985 if (loop_vinfo)
4986 loop = LOOP_VINFO_LOOP (loop_vinfo);
4988 /* Multiple types in SLP are handled by creating the appropriate number of
4989 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4990 case of SLP. */
4991 if (slp || PURE_SLP_STMT (stmt_info))
4992 ncopies = 1;
4993 else
4994 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4996 gcc_assert (ncopies >= 1);
4998 /* FORNOW. This restriction should be relaxed. */
4999 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5001 if (dump_enabled_p ())
5002 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5003 "multiple types in nested loop.\n");
5004 return false;
5007 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5008 return false;
5010 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5011 return false;
5013 /* Is vectorizable store? */
5015 if (!is_gimple_assign (stmt))
5016 return false;
5018 scalar_dest = gimple_assign_lhs (stmt);
5019 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5020 && is_pattern_stmt_p (stmt_info))
5021 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5022 if (TREE_CODE (scalar_dest) != ARRAY_REF
5023 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5024 && TREE_CODE (scalar_dest) != INDIRECT_REF
5025 && TREE_CODE (scalar_dest) != COMPONENT_REF
5026 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5027 && TREE_CODE (scalar_dest) != REALPART_EXPR
5028 && TREE_CODE (scalar_dest) != MEM_REF)
5029 return false;
5031 gcc_assert (gimple_assign_single_p (stmt));
5032 op = gimple_assign_rhs1 (stmt);
5033 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5034 &def, &dt))
5036 if (dump_enabled_p ())
5037 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5038 "use not simple.\n");
5039 return false;
5042 elem_type = TREE_TYPE (vectype);
5043 vec_mode = TYPE_MODE (vectype);
5045 /* FORNOW. In some cases can vectorize even if data-type not supported
5046 (e.g. - array initialization with 0). */
5047 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5048 return false;
5050 if (!STMT_VINFO_DATA_REF (stmt_info))
5051 return false;
5053 negative =
5054 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5055 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5056 size_zero_node) < 0;
5057 if (negative && ncopies > 1)
5059 if (dump_enabled_p ())
5060 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5061 "multiple types with negative step.\n");
5062 return false;
5065 if (negative)
5067 gcc_assert (!grouped_store);
5068 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5069 if (alignment_support_scheme != dr_aligned
5070 && alignment_support_scheme != dr_unaligned_supported)
5072 if (dump_enabled_p ())
5073 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5074 "negative step but alignment required.\n");
5075 return false;
5077 if (dt != vect_constant_def
5078 && dt != vect_external_def
5079 && !perm_mask_for_reverse (vectype))
5081 if (dump_enabled_p ())
5082 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5083 "negative step and reversing not supported.\n");
5084 return false;
5088 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5090 grouped_store = true;
5091 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5092 if (!slp && !PURE_SLP_STMT (stmt_info))
5094 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5095 if (vect_store_lanes_supported (vectype, group_size))
5096 store_lanes_p = true;
5097 else if (!vect_grouped_store_supported (vectype, group_size))
5098 return false;
5101 if (first_stmt == stmt)
5103 /* STMT is the leader of the group. Check the operands of all the
5104 stmts of the group. */
5105 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5106 while (next_stmt)
5108 gcc_assert (gimple_assign_single_p (next_stmt));
5109 op = gimple_assign_rhs1 (next_stmt);
5110 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5111 &def_stmt, &def, &dt))
5113 if (dump_enabled_p ())
5114 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5115 "use not simple.\n");
5116 return false;
5118 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5123 if (!vec_stmt) /* transformation not required. */
5125 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5126 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5127 NULL, NULL, NULL);
5128 return true;
5131 /** Transform. **/
5133 ensure_base_align (stmt_info, dr);
5135 if (grouped_store)
5137 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5138 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5140 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5142 /* FORNOW */
5143 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5145 /* We vectorize all the stmts of the interleaving group when we
5146 reach the last stmt in the group. */
5147 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5148 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5149 && !slp)
5151 *vec_stmt = NULL;
5152 return true;
5155 if (slp)
5157 grouped_store = false;
5158 /* VEC_NUM is the number of vect stmts to be created for this
5159 group. */
5160 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5161 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5162 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5163 op = gimple_assign_rhs1 (first_stmt);
5165 else
5166 /* VEC_NUM is the number of vect stmts to be created for this
5167 group. */
5168 vec_num = group_size;
5170 else
5172 first_stmt = stmt;
5173 first_dr = dr;
5174 group_size = vec_num = 1;
5177 if (dump_enabled_p ())
5178 dump_printf_loc (MSG_NOTE, vect_location,
5179 "transform store. ncopies = %d\n", ncopies);
5181 dr_chain.create (group_size);
5182 oprnds.create (group_size);
5184 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5185 gcc_assert (alignment_support_scheme);
5186 /* Targets with store-lane instructions must not require explicit
5187 realignment. */
5188 gcc_assert (!store_lanes_p
5189 || alignment_support_scheme == dr_aligned
5190 || alignment_support_scheme == dr_unaligned_supported);
5192 if (negative)
5193 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5195 if (store_lanes_p)
5196 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5197 else
5198 aggr_type = vectype;
5200 /* In case the vectorization factor (VF) is bigger than the number
5201 of elements that we can fit in a vectype (nunits), we have to generate
5202 more than one vector stmt - i.e - we need to "unroll" the
5203 vector stmt by a factor VF/nunits. For more details see documentation in
5204 vect_get_vec_def_for_copy_stmt. */
5206 /* In case of interleaving (non-unit grouped access):
5208 S1: &base + 2 = x2
5209 S2: &base = x0
5210 S3: &base + 1 = x1
5211 S4: &base + 3 = x3
5213 We create vectorized stores starting from base address (the access of the
5214 first stmt in the chain (S2 in the above example), when the last store stmt
5215 of the chain (S4) is reached:
5217 VS1: &base = vx2
5218 VS2: &base + vec_size*1 = vx0
5219 VS3: &base + vec_size*2 = vx1
5220 VS4: &base + vec_size*3 = vx3
5222 Then permutation statements are generated:
5224 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5225 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5228 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5229 (the order of the data-refs in the output of vect_permute_store_chain
5230 corresponds to the order of scalar stmts in the interleaving chain - see
5231 the documentation of vect_permute_store_chain()).
5233 In case of both multiple types and interleaving, above vector stores and
5234 permutation stmts are created for every copy. The result vector stmts are
5235 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5236 STMT_VINFO_RELATED_STMT for the next copies.
5239 prev_stmt_info = NULL;
5240 for (j = 0; j < ncopies; j++)
5242 gimple new_stmt;
5244 if (j == 0)
5246 if (slp)
5248 /* Get vectorized arguments for SLP_NODE. */
5249 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5250 NULL, slp_node, -1);
5252 vec_oprnd = vec_oprnds[0];
5254 else
5256 /* For interleaved stores we collect vectorized defs for all the
5257 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5258 used as an input to vect_permute_store_chain(), and OPRNDS as
5259 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5261 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5262 OPRNDS are of size 1. */
5263 next_stmt = first_stmt;
5264 for (i = 0; i < group_size; i++)
5266 /* Since gaps are not supported for interleaved stores,
5267 GROUP_SIZE is the exact number of stmts in the chain.
5268 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5269 there is no interleaving, GROUP_SIZE is 1, and only one
5270 iteration of the loop will be executed. */
5271 gcc_assert (next_stmt
5272 && gimple_assign_single_p (next_stmt));
5273 op = gimple_assign_rhs1 (next_stmt);
5275 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5276 NULL);
5277 dr_chain.quick_push (vec_oprnd);
5278 oprnds.quick_push (vec_oprnd);
5279 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5283 /* We should have catched mismatched types earlier. */
5284 gcc_assert (useless_type_conversion_p (vectype,
5285 TREE_TYPE (vec_oprnd)));
5286 bool simd_lane_access_p
5287 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5288 if (simd_lane_access_p
5289 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5290 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5291 && integer_zerop (DR_OFFSET (first_dr))
5292 && integer_zerop (DR_INIT (first_dr))
5293 && alias_sets_conflict_p (get_alias_set (aggr_type),
5294 get_alias_set (DR_REF (first_dr))))
5296 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5297 dataref_offset = build_int_cst (reference_alias_ptr_type
5298 (DR_REF (first_dr)), 0);
5299 inv_p = false;
5301 else
5302 dataref_ptr
5303 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5304 simd_lane_access_p ? loop : NULL,
5305 offset, &dummy, gsi, &ptr_incr,
5306 simd_lane_access_p, &inv_p);
5307 gcc_assert (bb_vinfo || !inv_p);
5309 else
5311 /* For interleaved stores we created vectorized defs for all the
5312 defs stored in OPRNDS in the previous iteration (previous copy).
5313 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5314 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5315 next copy.
5316 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5317 OPRNDS are of size 1. */
5318 for (i = 0; i < group_size; i++)
5320 op = oprnds[i];
5321 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5322 &def, &dt);
5323 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5324 dr_chain[i] = vec_oprnd;
5325 oprnds[i] = vec_oprnd;
5327 if (dataref_offset)
5328 dataref_offset
5329 = int_const_binop (PLUS_EXPR, dataref_offset,
5330 TYPE_SIZE_UNIT (aggr_type));
5331 else
5332 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5333 TYPE_SIZE_UNIT (aggr_type));
5336 if (store_lanes_p)
5338 tree vec_array;
5340 /* Combine all the vectors into an array. */
5341 vec_array = create_vector_array (vectype, vec_num);
5342 for (i = 0; i < vec_num; i++)
5344 vec_oprnd = dr_chain[i];
5345 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5348 /* Emit:
5349 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5350 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5351 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5352 gimple_call_set_lhs (new_stmt, data_ref);
5353 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5355 else
5357 new_stmt = NULL;
5358 if (grouped_store)
5360 if (j == 0)
5361 result_chain.create (group_size);
5362 /* Permute. */
5363 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5364 &result_chain);
5367 next_stmt = first_stmt;
5368 for (i = 0; i < vec_num; i++)
5370 unsigned align, misalign;
5372 if (i > 0)
5373 /* Bump the vector pointer. */
5374 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5375 stmt, NULL_TREE);
5377 if (slp)
5378 vec_oprnd = vec_oprnds[i];
5379 else if (grouped_store)
5380 /* For grouped stores vectorized defs are interleaved in
5381 vect_permute_store_chain(). */
5382 vec_oprnd = result_chain[i];
5384 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
5385 dataref_offset
5386 ? dataref_offset
5387 : build_int_cst (reference_alias_ptr_type
5388 (DR_REF (first_dr)), 0));
5389 align = TYPE_ALIGN_UNIT (vectype);
5390 if (aligned_access_p (first_dr))
5391 misalign = 0;
5392 else if (DR_MISALIGNMENT (first_dr) == -1)
5394 TREE_TYPE (data_ref)
5395 = build_aligned_type (TREE_TYPE (data_ref),
5396 TYPE_ALIGN (elem_type));
5397 align = TYPE_ALIGN_UNIT (elem_type);
5398 misalign = 0;
5400 else
5402 TREE_TYPE (data_ref)
5403 = build_aligned_type (TREE_TYPE (data_ref),
5404 TYPE_ALIGN (elem_type));
5405 misalign = DR_MISALIGNMENT (first_dr);
5407 if (dataref_offset == NULL_TREE)
5408 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5409 misalign);
5411 if (negative
5412 && dt != vect_constant_def
5413 && dt != vect_external_def)
5415 tree perm_mask = perm_mask_for_reverse (vectype);
5416 tree perm_dest
5417 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5418 vectype);
5419 tree new_temp = make_ssa_name (perm_dest, NULL);
5421 /* Generate the permute statement. */
5422 gimple perm_stmt
5423 = gimple_build_assign_with_ops (VEC_PERM_EXPR, new_temp,
5424 vec_oprnd, vec_oprnd,
5425 perm_mask);
5426 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5428 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5429 vec_oprnd = new_temp;
5432 /* Arguments are ready. Create the new vector stmt. */
5433 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5434 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5436 if (slp)
5437 continue;
5439 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5440 if (!next_stmt)
5441 break;
5444 if (!slp)
5446 if (j == 0)
5447 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5448 else
5449 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5450 prev_stmt_info = vinfo_for_stmt (new_stmt);
5454 dr_chain.release ();
5455 oprnds.release ();
5456 result_chain.release ();
5457 vec_oprnds.release ();
5459 return true;
5462 /* Given a vector type VECTYPE and permutation SEL returns
5463 the VECTOR_CST mask that implements the permutation of the
5464 vector elements. If that is impossible to do, returns NULL. */
5466 tree
5467 vect_gen_perm_mask (tree vectype, unsigned char *sel)
5469 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
5470 int i, nunits;
5472 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5474 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
5475 return NULL;
5477 mask_elt_type = lang_hooks.types.type_for_mode
5478 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
5479 mask_type = get_vectype_for_scalar_type (mask_elt_type);
5481 mask_elts = XALLOCAVEC (tree, nunits);
5482 for (i = nunits - 1; i >= 0; i--)
5483 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5484 mask_vec = build_vector (mask_type, mask_elts);
5486 return mask_vec;
5489 /* Given a vector variable X and Y, that was generated for the scalar
5490 STMT, generate instructions to permute the vector elements of X and Y
5491 using permutation mask MASK_VEC, insert them at *GSI and return the
5492 permuted vector variable. */
5494 static tree
5495 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5496 gimple_stmt_iterator *gsi)
5498 tree vectype = TREE_TYPE (x);
5499 tree perm_dest, data_ref;
5500 gimple perm_stmt;
5502 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
5503 data_ref = make_ssa_name (perm_dest, NULL);
5505 /* Generate the permute statement. */
5506 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
5507 x, y, mask_vec);
5508 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5510 return data_ref;
5513 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5514 inserting them on the loops preheader edge. Returns true if we
5515 were successful in doing so (and thus STMT can be moved then),
5516 otherwise returns false. */
5518 static bool
5519 hoist_defs_of_uses (gimple stmt, struct loop *loop)
5521 ssa_op_iter i;
5522 tree op;
5523 bool any = false;
5525 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5527 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5528 if (!gimple_nop_p (def_stmt)
5529 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5531 /* Make sure we don't need to recurse. While we could do
5532 so in simple cases when there are more complex use webs
5533 we don't have an easy way to preserve stmt order to fulfil
5534 dependencies within them. */
5535 tree op2;
5536 ssa_op_iter i2;
5537 if (gimple_code (def_stmt) == GIMPLE_PHI)
5538 return false;
5539 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5541 gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5542 if (!gimple_nop_p (def_stmt2)
5543 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5544 return false;
5546 any = true;
5550 if (!any)
5551 return true;
5553 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5555 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5556 if (!gimple_nop_p (def_stmt)
5557 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5559 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5560 gsi_remove (&gsi, false);
5561 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5565 return true;
5568 /* vectorizable_load.
5570 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5571 can be vectorized.
5572 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5573 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5574 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5576 static bool
5577 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5578 slp_tree slp_node, slp_instance slp_node_instance)
5580 tree scalar_dest;
5581 tree vec_dest = NULL;
5582 tree data_ref = NULL;
5583 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5584 stmt_vec_info prev_stmt_info;
5585 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5586 struct loop *loop = NULL;
5587 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
5588 bool nested_in_vect_loop = false;
5589 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5590 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5591 tree elem_type;
5592 tree new_temp;
5593 enum machine_mode mode;
5594 gimple new_stmt = NULL;
5595 tree dummy;
5596 enum dr_alignment_support alignment_support_scheme;
5597 tree dataref_ptr = NULL_TREE;
5598 tree dataref_offset = NULL_TREE;
5599 gimple ptr_incr = NULL;
5600 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5601 int ncopies;
5602 int i, j, group_size, group_gap;
5603 tree msq = NULL_TREE, lsq;
5604 tree offset = NULL_TREE;
5605 tree byte_offset = NULL_TREE;
5606 tree realignment_token = NULL_TREE;
5607 gimple phi = NULL;
5608 vec<tree> dr_chain = vNULL;
5609 bool grouped_load = false;
5610 bool load_lanes_p = false;
5611 gimple first_stmt;
5612 bool inv_p;
5613 bool negative = false;
5614 bool compute_in_loop = false;
5615 struct loop *at_loop;
5616 int vec_num;
5617 bool slp = (slp_node != NULL);
5618 bool slp_perm = false;
5619 enum tree_code code;
5620 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5621 int vf;
5622 tree aggr_type;
5623 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5624 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5625 int gather_scale = 1;
5626 enum vect_def_type gather_dt = vect_unknown_def_type;
5628 if (loop_vinfo)
5630 loop = LOOP_VINFO_LOOP (loop_vinfo);
5631 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5632 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5634 else
5635 vf = 1;
5637 /* Multiple types in SLP are handled by creating the appropriate number of
5638 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5639 case of SLP. */
5640 if (slp || PURE_SLP_STMT (stmt_info))
5641 ncopies = 1;
5642 else
5643 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5645 gcc_assert (ncopies >= 1);
5647 /* FORNOW. This restriction should be relaxed. */
5648 if (nested_in_vect_loop && ncopies > 1)
5650 if (dump_enabled_p ())
5651 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5652 "multiple types in nested loop.\n");
5653 return false;
5656 /* Invalidate assumptions made by dependence analysis when vectorization
5657 on the unrolled body effectively re-orders stmts. */
5658 if (ncopies > 1
5659 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5660 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5661 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5663 if (dump_enabled_p ())
5664 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5665 "cannot perform implicit CSE when unrolling "
5666 "with negative dependence distance\n");
5667 return false;
5670 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5671 return false;
5673 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5674 return false;
5676 /* Is vectorizable load? */
5677 if (!is_gimple_assign (stmt))
5678 return false;
5680 scalar_dest = gimple_assign_lhs (stmt);
5681 if (TREE_CODE (scalar_dest) != SSA_NAME)
5682 return false;
5684 code = gimple_assign_rhs_code (stmt);
5685 if (code != ARRAY_REF
5686 && code != BIT_FIELD_REF
5687 && code != INDIRECT_REF
5688 && code != COMPONENT_REF
5689 && code != IMAGPART_EXPR
5690 && code != REALPART_EXPR
5691 && code != MEM_REF
5692 && TREE_CODE_CLASS (code) != tcc_declaration)
5693 return false;
5695 if (!STMT_VINFO_DATA_REF (stmt_info))
5696 return false;
5698 elem_type = TREE_TYPE (vectype);
5699 mode = TYPE_MODE (vectype);
5701 /* FORNOW. In some cases can vectorize even if data-type not supported
5702 (e.g. - data copies). */
5703 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
5705 if (dump_enabled_p ())
5706 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5707 "Aligned load, but unsupported type.\n");
5708 return false;
5711 /* Check if the load is a part of an interleaving chain. */
5712 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5714 grouped_load = true;
5715 /* FORNOW */
5716 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
5718 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5719 if (!slp && !PURE_SLP_STMT (stmt_info))
5721 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5722 if (vect_load_lanes_supported (vectype, group_size))
5723 load_lanes_p = true;
5724 else if (!vect_grouped_load_supported (vectype, group_size))
5725 return false;
5728 /* Invalidate assumptions made by dependence analysis when vectorization
5729 on the unrolled body effectively re-orders stmts. */
5730 if (!PURE_SLP_STMT (stmt_info)
5731 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5732 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5733 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5735 if (dump_enabled_p ())
5736 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5737 "cannot perform implicit CSE when performing "
5738 "group loads with negative dependence distance\n");
5739 return false;
5744 if (STMT_VINFO_GATHER_P (stmt_info))
5746 gimple def_stmt;
5747 tree def;
5748 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
5749 &gather_off, &gather_scale);
5750 gcc_assert (gather_decl);
5751 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
5752 &def_stmt, &def, &gather_dt,
5753 &gather_off_vectype))
5755 if (dump_enabled_p ())
5756 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5757 "gather index use not simple.\n");
5758 return false;
5761 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
5763 else
5765 negative = tree_int_cst_compare (nested_in_vect_loop
5766 ? STMT_VINFO_DR_STEP (stmt_info)
5767 : DR_STEP (dr),
5768 size_zero_node) < 0;
5769 if (negative && ncopies > 1)
5771 if (dump_enabled_p ())
5772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5773 "multiple types with negative step.\n");
5774 return false;
5777 if (negative)
5779 if (grouped_load)
5781 if (dump_enabled_p ())
5782 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5783 "negative step for group load not supported"
5784 "\n");
5785 return false;
5787 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5788 if (alignment_support_scheme != dr_aligned
5789 && alignment_support_scheme != dr_unaligned_supported)
5791 if (dump_enabled_p ())
5792 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5793 "negative step but alignment required.\n");
5794 return false;
5796 if (!perm_mask_for_reverse (vectype))
5798 if (dump_enabled_p ())
5799 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5800 "negative step and reversing not supported."
5801 "\n");
5802 return false;
5807 if (!vec_stmt) /* transformation not required. */
5809 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
5810 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
5811 return true;
5814 if (dump_enabled_p ())
5815 dump_printf_loc (MSG_NOTE, vect_location,
5816 "transform load. ncopies = %d\n", ncopies);
5818 /** Transform. **/
5820 ensure_base_align (stmt_info, dr);
5822 if (STMT_VINFO_GATHER_P (stmt_info))
5824 tree vec_oprnd0 = NULL_TREE, op;
5825 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
5826 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5827 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
5828 edge pe = loop_preheader_edge (loop);
5829 gimple_seq seq;
5830 basic_block new_bb;
5831 enum { NARROW, NONE, WIDEN } modifier;
5832 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
5834 if (nunits == gather_off_nunits)
5835 modifier = NONE;
5836 else if (nunits == gather_off_nunits / 2)
5838 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
5839 modifier = WIDEN;
5841 for (i = 0; i < gather_off_nunits; ++i)
5842 sel[i] = i | nunits;
5844 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
5845 gcc_assert (perm_mask != NULL_TREE);
5847 else if (nunits == gather_off_nunits * 2)
5849 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5850 modifier = NARROW;
5852 for (i = 0; i < nunits; ++i)
5853 sel[i] = i < gather_off_nunits
5854 ? i : i + nunits - gather_off_nunits;
5856 perm_mask = vect_gen_perm_mask (vectype, sel);
5857 gcc_assert (perm_mask != NULL_TREE);
5858 ncopies *= 2;
5860 else
5861 gcc_unreachable ();
5863 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
5864 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5865 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5866 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5867 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5868 scaletype = TREE_VALUE (arglist);
5869 gcc_checking_assert (types_compatible_p (srctype, rettype));
5871 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5873 ptr = fold_convert (ptrtype, gather_base);
5874 if (!is_gimple_min_invariant (ptr))
5876 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5877 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5878 gcc_assert (!new_bb);
5881 /* Currently we support only unconditional gather loads,
5882 so mask should be all ones. */
5883 if (TREE_CODE (masktype) == INTEGER_TYPE)
5884 mask = build_int_cst (masktype, -1);
5885 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
5887 mask = build_int_cst (TREE_TYPE (masktype), -1);
5888 mask = build_vector_from_val (masktype, mask);
5889 mask = vect_init_vector (stmt, mask, masktype, NULL);
5891 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
5893 REAL_VALUE_TYPE r;
5894 long tmp[6];
5895 for (j = 0; j < 6; ++j)
5896 tmp[j] = -1;
5897 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
5898 mask = build_real (TREE_TYPE (masktype), r);
5899 mask = build_vector_from_val (masktype, mask);
5900 mask = vect_init_vector (stmt, mask, masktype, NULL);
5902 else
5903 gcc_unreachable ();
5905 scale = build_int_cst (scaletype, gather_scale);
5907 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
5908 merge = build_int_cst (TREE_TYPE (rettype), 0);
5909 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
5911 REAL_VALUE_TYPE r;
5912 long tmp[6];
5913 for (j = 0; j < 6; ++j)
5914 tmp[j] = 0;
5915 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
5916 merge = build_real (TREE_TYPE (rettype), r);
5918 else
5919 gcc_unreachable ();
5920 merge = build_vector_from_val (rettype, merge);
5921 merge = vect_init_vector (stmt, merge, rettype, NULL);
5923 prev_stmt_info = NULL;
5924 for (j = 0; j < ncopies; ++j)
5926 if (modifier == WIDEN && (j & 1))
5927 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
5928 perm_mask, stmt, gsi);
5929 else if (j == 0)
5930 op = vec_oprnd0
5931 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
5932 else
5933 op = vec_oprnd0
5934 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
5936 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5938 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5939 == TYPE_VECTOR_SUBPARTS (idxtype));
5940 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
5941 var = make_ssa_name (var, NULL);
5942 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5943 new_stmt
5944 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
5945 op, NULL_TREE);
5946 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5947 op = var;
5950 new_stmt
5951 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
5953 if (!useless_type_conversion_p (vectype, rettype))
5955 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
5956 == TYPE_VECTOR_SUBPARTS (rettype));
5957 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
5958 op = make_ssa_name (var, new_stmt);
5959 gimple_call_set_lhs (new_stmt, op);
5960 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5961 var = make_ssa_name (vec_dest, NULL);
5962 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
5963 new_stmt
5964 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
5965 NULL_TREE);
5967 else
5969 var = make_ssa_name (vec_dest, new_stmt);
5970 gimple_call_set_lhs (new_stmt, var);
5973 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5975 if (modifier == NARROW)
5977 if ((j & 1) == 0)
5979 prev_res = var;
5980 continue;
5982 var = permute_vec_elements (prev_res, var,
5983 perm_mask, stmt, gsi);
5984 new_stmt = SSA_NAME_DEF_STMT (var);
5987 if (prev_stmt_info == NULL)
5988 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5989 else
5990 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5991 prev_stmt_info = vinfo_for_stmt (new_stmt);
5993 return true;
5995 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
5997 gimple_stmt_iterator incr_gsi;
5998 bool insert_after;
5999 gimple incr;
6000 tree offvar;
6001 tree ivstep;
6002 tree running_off;
6003 vec<constructor_elt, va_gc> *v = NULL;
6004 gimple_seq stmts = NULL;
6005 tree stride_base, stride_step, alias_off;
6007 gcc_assert (!nested_in_vect_loop);
6009 stride_base
6010 = fold_build_pointer_plus
6011 (unshare_expr (DR_BASE_ADDRESS (dr)),
6012 size_binop (PLUS_EXPR,
6013 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
6014 convert_to_ptrofftype (DR_INIT (dr))));
6015 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
6017 /* For a load with loop-invariant (but other than power-of-2)
6018 stride (i.e. not a grouped access) like so:
6020 for (i = 0; i < n; i += stride)
6021 ... = array[i];
6023 we generate a new induction variable and new accesses to
6024 form a new vector (or vectors, depending on ncopies):
6026 for (j = 0; ; j += VF*stride)
6027 tmp1 = array[j];
6028 tmp2 = array[j + stride];
6030 vectemp = {tmp1, tmp2, ...}
6033 ivstep = stride_step;
6034 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6035 build_int_cst (TREE_TYPE (ivstep), vf));
6037 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6039 create_iv (stride_base, ivstep, NULL,
6040 loop, &incr_gsi, insert_after,
6041 &offvar, NULL);
6042 incr = gsi_stmt (incr_gsi);
6043 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
6045 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6046 if (stmts)
6047 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6049 prev_stmt_info = NULL;
6050 running_off = offvar;
6051 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
6052 for (j = 0; j < ncopies; j++)
6054 tree vec_inv;
6056 vec_alloc (v, nunits);
6057 for (i = 0; i < nunits; i++)
6059 tree newref, newoff;
6060 gimple incr;
6061 newref = build2 (MEM_REF, TREE_TYPE (vectype),
6062 running_off, alias_off);
6064 newref = force_gimple_operand_gsi (gsi, newref, true,
6065 NULL_TREE, true,
6066 GSI_SAME_STMT);
6067 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6068 newoff = copy_ssa_name (running_off, NULL);
6069 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
6070 running_off, stride_step);
6071 vect_finish_stmt_generation (stmt, incr, gsi);
6073 running_off = newoff;
6076 vec_inv = build_constructor (vectype, v);
6077 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6078 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6080 if (j == 0)
6081 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6082 else
6083 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6084 prev_stmt_info = vinfo_for_stmt (new_stmt);
6086 return true;
6089 if (grouped_load)
6091 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6092 if (slp
6093 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
6094 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6095 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6097 /* Check if the chain of loads is already vectorized. */
6098 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6099 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6100 ??? But we can only do so if there is exactly one
6101 as we have no way to get at the rest. Leave the CSE
6102 opportunity alone.
6103 ??? With the group load eventually participating
6104 in multiple different permutations (having multiple
6105 slp nodes which refer to the same group) the CSE
6106 is even wrong code. See PR56270. */
6107 && !slp)
6109 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6110 return true;
6112 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6113 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6115 /* VEC_NUM is the number of vect stmts to be created for this group. */
6116 if (slp)
6118 grouped_load = false;
6119 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6120 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6121 slp_perm = true;
6122 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
6124 else
6126 vec_num = group_size;
6127 group_gap = 0;
6130 else
6132 first_stmt = stmt;
6133 first_dr = dr;
6134 group_size = vec_num = 1;
6135 group_gap = 0;
6138 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6139 gcc_assert (alignment_support_scheme);
6140 /* Targets with load-lane instructions must not require explicit
6141 realignment. */
6142 gcc_assert (!load_lanes_p
6143 || alignment_support_scheme == dr_aligned
6144 || alignment_support_scheme == dr_unaligned_supported);
6146 /* In case the vectorization factor (VF) is bigger than the number
6147 of elements that we can fit in a vectype (nunits), we have to generate
6148 more than one vector stmt - i.e - we need to "unroll" the
6149 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6150 from one copy of the vector stmt to the next, in the field
6151 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6152 stages to find the correct vector defs to be used when vectorizing
6153 stmts that use the defs of the current stmt. The example below
6154 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6155 need to create 4 vectorized stmts):
6157 before vectorization:
6158 RELATED_STMT VEC_STMT
6159 S1: x = memref - -
6160 S2: z = x + 1 - -
6162 step 1: vectorize stmt S1:
6163 We first create the vector stmt VS1_0, and, as usual, record a
6164 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6165 Next, we create the vector stmt VS1_1, and record a pointer to
6166 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6167 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6168 stmts and pointers:
6169 RELATED_STMT VEC_STMT
6170 VS1_0: vx0 = memref0 VS1_1 -
6171 VS1_1: vx1 = memref1 VS1_2 -
6172 VS1_2: vx2 = memref2 VS1_3 -
6173 VS1_3: vx3 = memref3 - -
6174 S1: x = load - VS1_0
6175 S2: z = x + 1 - -
6177 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6178 information we recorded in RELATED_STMT field is used to vectorize
6179 stmt S2. */
6181 /* In case of interleaving (non-unit grouped access):
6183 S1: x2 = &base + 2
6184 S2: x0 = &base
6185 S3: x1 = &base + 1
6186 S4: x3 = &base + 3
6188 Vectorized loads are created in the order of memory accesses
6189 starting from the access of the first stmt of the chain:
6191 VS1: vx0 = &base
6192 VS2: vx1 = &base + vec_size*1
6193 VS3: vx3 = &base + vec_size*2
6194 VS4: vx4 = &base + vec_size*3
6196 Then permutation statements are generated:
6198 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6199 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6202 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6203 (the order of the data-refs in the output of vect_permute_load_chain
6204 corresponds to the order of scalar stmts in the interleaving chain - see
6205 the documentation of vect_permute_load_chain()).
6206 The generation of permutation stmts and recording them in
6207 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6209 In case of both multiple types and interleaving, the vector loads and
6210 permutation stmts above are created for every copy. The result vector
6211 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6212 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6214 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6215 on a target that supports unaligned accesses (dr_unaligned_supported)
6216 we generate the following code:
6217 p = initial_addr;
6218 indx = 0;
6219 loop {
6220 p = p + indx * vectype_size;
6221 vec_dest = *(p);
6222 indx = indx + 1;
6225 Otherwise, the data reference is potentially unaligned on a target that
6226 does not support unaligned accesses (dr_explicit_realign_optimized) -
6227 then generate the following code, in which the data in each iteration is
6228 obtained by two vector loads, one from the previous iteration, and one
6229 from the current iteration:
6230 p1 = initial_addr;
6231 msq_init = *(floor(p1))
6232 p2 = initial_addr + VS - 1;
6233 realignment_token = call target_builtin;
6234 indx = 0;
6235 loop {
6236 p2 = p2 + indx * vectype_size
6237 lsq = *(floor(p2))
6238 vec_dest = realign_load (msq, lsq, realignment_token)
6239 indx = indx + 1;
6240 msq = lsq;
6241 } */
6243 /* If the misalignment remains the same throughout the execution of the
6244 loop, we can create the init_addr and permutation mask at the loop
6245 preheader. Otherwise, it needs to be created inside the loop.
6246 This can only occur when vectorizing memory accesses in the inner-loop
6247 nested within an outer-loop that is being vectorized. */
6249 if (nested_in_vect_loop
6250 && (TREE_INT_CST_LOW (DR_STEP (dr))
6251 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6253 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6254 compute_in_loop = true;
6257 if ((alignment_support_scheme == dr_explicit_realign_optimized
6258 || alignment_support_scheme == dr_explicit_realign)
6259 && !compute_in_loop)
6261 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6262 alignment_support_scheme, NULL_TREE,
6263 &at_loop);
6264 if (alignment_support_scheme == dr_explicit_realign_optimized)
6266 phi = SSA_NAME_DEF_STMT (msq);
6267 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
6268 size_one_node);
6271 else
6272 at_loop = loop;
6274 if (negative)
6275 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6277 if (load_lanes_p)
6278 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6279 else
6280 aggr_type = vectype;
6282 prev_stmt_info = NULL;
6283 for (j = 0; j < ncopies; j++)
6285 /* 1. Create the vector or array pointer update chain. */
6286 if (j == 0)
6288 bool simd_lane_access_p
6289 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6290 if (simd_lane_access_p
6291 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6292 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6293 && integer_zerop (DR_OFFSET (first_dr))
6294 && integer_zerop (DR_INIT (first_dr))
6295 && alias_sets_conflict_p (get_alias_set (aggr_type),
6296 get_alias_set (DR_REF (first_dr)))
6297 && (alignment_support_scheme == dr_aligned
6298 || alignment_support_scheme == dr_unaligned_supported))
6300 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6301 dataref_offset = build_int_cst (reference_alias_ptr_type
6302 (DR_REF (first_dr)), 0);
6303 inv_p = false;
6305 else
6306 dataref_ptr
6307 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6308 offset, &dummy, gsi, &ptr_incr,
6309 simd_lane_access_p, &inv_p,
6310 byte_offset);
6312 else if (dataref_offset)
6313 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6314 TYPE_SIZE_UNIT (aggr_type));
6315 else
6316 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6317 TYPE_SIZE_UNIT (aggr_type));
6319 if (grouped_load || slp_perm)
6320 dr_chain.create (vec_num);
6322 if (load_lanes_p)
6324 tree vec_array;
6326 vec_array = create_vector_array (vectype, vec_num);
6328 /* Emit:
6329 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6330 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6331 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6332 gimple_call_set_lhs (new_stmt, vec_array);
6333 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6335 /* Extract each vector into an SSA_NAME. */
6336 for (i = 0; i < vec_num; i++)
6338 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6339 vec_array, i);
6340 dr_chain.quick_push (new_temp);
6343 /* Record the mapping between SSA_NAMEs and statements. */
6344 vect_record_grouped_load_vectors (stmt, dr_chain);
6346 else
6348 for (i = 0; i < vec_num; i++)
6350 if (i > 0)
6351 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6352 stmt, NULL_TREE);
6354 /* 2. Create the vector-load in the loop. */
6355 switch (alignment_support_scheme)
6357 case dr_aligned:
6358 case dr_unaligned_supported:
6360 unsigned int align, misalign;
6362 data_ref
6363 = build2 (MEM_REF, vectype, dataref_ptr,
6364 dataref_offset
6365 ? dataref_offset
6366 : build_int_cst (reference_alias_ptr_type
6367 (DR_REF (first_dr)), 0));
6368 align = TYPE_ALIGN_UNIT (vectype);
6369 if (alignment_support_scheme == dr_aligned)
6371 gcc_assert (aligned_access_p (first_dr));
6372 misalign = 0;
6374 else if (DR_MISALIGNMENT (first_dr) == -1)
6376 TREE_TYPE (data_ref)
6377 = build_aligned_type (TREE_TYPE (data_ref),
6378 TYPE_ALIGN (elem_type));
6379 align = TYPE_ALIGN_UNIT (elem_type);
6380 misalign = 0;
6382 else
6384 TREE_TYPE (data_ref)
6385 = build_aligned_type (TREE_TYPE (data_ref),
6386 TYPE_ALIGN (elem_type));
6387 misalign = DR_MISALIGNMENT (first_dr);
6389 if (dataref_offset == NULL_TREE)
6390 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6391 align, misalign);
6392 break;
6394 case dr_explicit_realign:
6396 tree ptr, bump;
6397 tree vs_minus_1;
6399 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
6401 if (compute_in_loop)
6402 msq = vect_setup_realignment (first_stmt, gsi,
6403 &realignment_token,
6404 dr_explicit_realign,
6405 dataref_ptr, NULL);
6407 ptr = copy_ssa_name (dataref_ptr, NULL);
6408 new_stmt = gimple_build_assign_with_ops
6409 (BIT_AND_EXPR, ptr, dataref_ptr,
6410 build_int_cst
6411 (TREE_TYPE (dataref_ptr),
6412 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6413 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6414 data_ref
6415 = build2 (MEM_REF, vectype, ptr,
6416 build_int_cst (reference_alias_ptr_type
6417 (DR_REF (first_dr)), 0));
6418 vec_dest = vect_create_destination_var (scalar_dest,
6419 vectype);
6420 new_stmt = gimple_build_assign (vec_dest, data_ref);
6421 new_temp = make_ssa_name (vec_dest, new_stmt);
6422 gimple_assign_set_lhs (new_stmt, new_temp);
6423 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6424 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6425 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6426 msq = new_temp;
6428 bump = size_binop (MULT_EXPR, vs_minus_1,
6429 TYPE_SIZE_UNIT (elem_type));
6430 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
6431 new_stmt = gimple_build_assign_with_ops
6432 (BIT_AND_EXPR, NULL_TREE, ptr,
6433 build_int_cst
6434 (TREE_TYPE (ptr),
6435 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6436 ptr = copy_ssa_name (dataref_ptr, new_stmt);
6437 gimple_assign_set_lhs (new_stmt, ptr);
6438 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6439 data_ref
6440 = build2 (MEM_REF, vectype, ptr,
6441 build_int_cst (reference_alias_ptr_type
6442 (DR_REF (first_dr)), 0));
6443 break;
6445 case dr_explicit_realign_optimized:
6446 new_temp = copy_ssa_name (dataref_ptr, NULL);
6447 new_stmt = gimple_build_assign_with_ops
6448 (BIT_AND_EXPR, new_temp, dataref_ptr,
6449 build_int_cst
6450 (TREE_TYPE (dataref_ptr),
6451 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6452 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6453 data_ref
6454 = build2 (MEM_REF, vectype, new_temp,
6455 build_int_cst (reference_alias_ptr_type
6456 (DR_REF (first_dr)), 0));
6457 break;
6458 default:
6459 gcc_unreachable ();
6461 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6462 new_stmt = gimple_build_assign (vec_dest, data_ref);
6463 new_temp = make_ssa_name (vec_dest, new_stmt);
6464 gimple_assign_set_lhs (new_stmt, new_temp);
6465 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6467 /* 3. Handle explicit realignment if necessary/supported.
6468 Create in loop:
6469 vec_dest = realign_load (msq, lsq, realignment_token) */
6470 if (alignment_support_scheme == dr_explicit_realign_optimized
6471 || alignment_support_scheme == dr_explicit_realign)
6473 lsq = gimple_assign_lhs (new_stmt);
6474 if (!realignment_token)
6475 realignment_token = dataref_ptr;
6476 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6477 new_stmt
6478 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
6479 vec_dest, msq, lsq,
6480 realignment_token);
6481 new_temp = make_ssa_name (vec_dest, new_stmt);
6482 gimple_assign_set_lhs (new_stmt, new_temp);
6483 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6485 if (alignment_support_scheme == dr_explicit_realign_optimized)
6487 gcc_assert (phi);
6488 if (i == vec_num - 1 && j == ncopies - 1)
6489 add_phi_arg (phi, lsq,
6490 loop_latch_edge (containing_loop),
6491 UNKNOWN_LOCATION);
6492 msq = lsq;
6496 /* 4. Handle invariant-load. */
6497 if (inv_p && !bb_vinfo)
6499 gcc_assert (!grouped_load);
6500 /* If we have versioned for aliasing or the loop doesn't
6501 have any data dependencies that would preclude this,
6502 then we are sure this is a loop invariant load and
6503 thus we can insert it on the preheader edge. */
6504 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
6505 && !nested_in_vect_loop
6506 && hoist_defs_of_uses (stmt, loop))
6508 if (dump_enabled_p ())
6510 dump_printf_loc (MSG_NOTE, vect_location,
6511 "hoisting out of the vectorized "
6512 "loop: ");
6513 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6514 dump_printf (MSG_NOTE, "\n");
6516 tree tem = copy_ssa_name (scalar_dest, NULL);
6517 gsi_insert_on_edge_immediate
6518 (loop_preheader_edge (loop),
6519 gimple_build_assign (tem,
6520 unshare_expr
6521 (gimple_assign_rhs1 (stmt))));
6522 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6524 else
6526 gimple_stmt_iterator gsi2 = *gsi;
6527 gsi_next (&gsi2);
6528 new_temp = vect_init_vector (stmt, scalar_dest,
6529 vectype, &gsi2);
6531 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6532 set_vinfo_for_stmt (new_stmt,
6533 new_stmt_vec_info (new_stmt, loop_vinfo,
6534 bb_vinfo));
6537 if (negative)
6539 tree perm_mask = perm_mask_for_reverse (vectype);
6540 new_temp = permute_vec_elements (new_temp, new_temp,
6541 perm_mask, stmt, gsi);
6542 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6545 /* Collect vector loads and later create their permutation in
6546 vect_transform_grouped_load (). */
6547 if (grouped_load || slp_perm)
6548 dr_chain.quick_push (new_temp);
6550 /* Store vector loads in the corresponding SLP_NODE. */
6551 if (slp && !slp_perm)
6552 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6554 /* Bump the vector pointer to account for a gap. */
6555 if (slp && group_gap != 0)
6557 tree bump = size_binop (MULT_EXPR,
6558 TYPE_SIZE_UNIT (elem_type),
6559 size_int (group_gap));
6560 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6561 stmt, bump);
6565 if (slp && !slp_perm)
6566 continue;
6568 if (slp_perm)
6570 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6571 slp_node_instance, false))
6573 dr_chain.release ();
6574 return false;
6577 else
6579 if (grouped_load)
6581 if (!load_lanes_p)
6582 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
6583 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6585 else
6587 if (j == 0)
6588 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6589 else
6590 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6591 prev_stmt_info = vinfo_for_stmt (new_stmt);
6594 dr_chain.release ();
6597 return true;
6600 /* Function vect_is_simple_cond.
6602 Input:
6603 LOOP - the loop that is being vectorized.
6604 COND - Condition that is checked for simple use.
6606 Output:
6607 *COMP_VECTYPE - the vector type for the comparison.
6609 Returns whether a COND can be vectorized. Checks whether
6610 condition operands are supportable using vec_is_simple_use. */
6612 static bool
6613 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6614 bb_vec_info bb_vinfo, tree *comp_vectype)
6616 tree lhs, rhs;
6617 tree def;
6618 enum vect_def_type dt;
6619 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
6621 if (!COMPARISON_CLASS_P (cond))
6622 return false;
6624 lhs = TREE_OPERAND (cond, 0);
6625 rhs = TREE_OPERAND (cond, 1);
6627 if (TREE_CODE (lhs) == SSA_NAME)
6629 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
6630 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6631 &lhs_def_stmt, &def, &dt, &vectype1))
6632 return false;
6634 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6635 && TREE_CODE (lhs) != FIXED_CST)
6636 return false;
6638 if (TREE_CODE (rhs) == SSA_NAME)
6640 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
6641 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6642 &rhs_def_stmt, &def, &dt, &vectype2))
6643 return false;
6645 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
6646 && TREE_CODE (rhs) != FIXED_CST)
6647 return false;
6649 *comp_vectype = vectype1 ? vectype1 : vectype2;
6650 return true;
6653 /* vectorizable_condition.
6655 Check if STMT is conditional modify expression that can be vectorized.
6656 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6657 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6658 at GSI.
6660 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6661 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6662 else caluse if it is 2).
6664 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6666 bool
6667 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
6668 gimple *vec_stmt, tree reduc_def, int reduc_index,
6669 slp_tree slp_node)
6671 tree scalar_dest = NULL_TREE;
6672 tree vec_dest = NULL_TREE;
6673 tree cond_expr, then_clause, else_clause;
6674 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6675 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6676 tree comp_vectype = NULL_TREE;
6677 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
6678 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
6679 tree vec_compare, vec_cond_expr;
6680 tree new_temp;
6681 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6682 tree def;
6683 enum vect_def_type dt, dts[4];
6684 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6685 int ncopies;
6686 enum tree_code code;
6687 stmt_vec_info prev_stmt_info = NULL;
6688 int i, j;
6689 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6690 vec<tree> vec_oprnds0 = vNULL;
6691 vec<tree> vec_oprnds1 = vNULL;
6692 vec<tree> vec_oprnds2 = vNULL;
6693 vec<tree> vec_oprnds3 = vNULL;
6694 tree vec_cmp_type;
6696 if (slp_node || PURE_SLP_STMT (stmt_info))
6697 ncopies = 1;
6698 else
6699 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6701 gcc_assert (ncopies >= 1);
6702 if (reduc_index && ncopies > 1)
6703 return false; /* FORNOW */
6705 if (reduc_index && STMT_SLP_TYPE (stmt_info))
6706 return false;
6708 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6709 return false;
6711 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6712 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
6713 && reduc_def))
6714 return false;
6716 /* FORNOW: not yet supported. */
6717 if (STMT_VINFO_LIVE_P (stmt_info))
6719 if (dump_enabled_p ())
6720 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6721 "value used after loop.\n");
6722 return false;
6725 /* Is vectorizable conditional operation? */
6726 if (!is_gimple_assign (stmt))
6727 return false;
6729 code = gimple_assign_rhs_code (stmt);
6731 if (code != COND_EXPR)
6732 return false;
6734 cond_expr = gimple_assign_rhs1 (stmt);
6735 then_clause = gimple_assign_rhs2 (stmt);
6736 else_clause = gimple_assign_rhs3 (stmt);
6738 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
6739 &comp_vectype)
6740 || !comp_vectype)
6741 return false;
6743 if (TREE_CODE (then_clause) == SSA_NAME)
6745 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
6746 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
6747 &then_def_stmt, &def, &dt))
6748 return false;
6750 else if (TREE_CODE (then_clause) != INTEGER_CST
6751 && TREE_CODE (then_clause) != REAL_CST
6752 && TREE_CODE (then_clause) != FIXED_CST)
6753 return false;
6755 if (TREE_CODE (else_clause) == SSA_NAME)
6757 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
6758 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
6759 &else_def_stmt, &def, &dt))
6760 return false;
6762 else if (TREE_CODE (else_clause) != INTEGER_CST
6763 && TREE_CODE (else_clause) != REAL_CST
6764 && TREE_CODE (else_clause) != FIXED_CST)
6765 return false;
6767 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
6768 /* The result of a vector comparison should be signed type. */
6769 tree cmp_type = build_nonstandard_integer_type (prec, 0);
6770 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
6771 if (vec_cmp_type == NULL_TREE)
6772 return false;
6774 if (!vec_stmt)
6776 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
6777 return expand_vec_cond_expr_p (vectype, comp_vectype);
6780 /* Transform. */
6782 if (!slp_node)
6784 vec_oprnds0.create (1);
6785 vec_oprnds1.create (1);
6786 vec_oprnds2.create (1);
6787 vec_oprnds3.create (1);
6790 /* Handle def. */
6791 scalar_dest = gimple_assign_lhs (stmt);
6792 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6794 /* Handle cond expr. */
6795 for (j = 0; j < ncopies; j++)
6797 gimple new_stmt = NULL;
6798 if (j == 0)
6800 if (slp_node)
6802 auto_vec<tree, 4> ops;
6803 auto_vec<vec<tree>, 4> vec_defs;
6805 ops.safe_push (TREE_OPERAND (cond_expr, 0));
6806 ops.safe_push (TREE_OPERAND (cond_expr, 1));
6807 ops.safe_push (then_clause);
6808 ops.safe_push (else_clause);
6809 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
6810 vec_oprnds3 = vec_defs.pop ();
6811 vec_oprnds2 = vec_defs.pop ();
6812 vec_oprnds1 = vec_defs.pop ();
6813 vec_oprnds0 = vec_defs.pop ();
6815 ops.release ();
6816 vec_defs.release ();
6818 else
6820 gimple gtemp;
6821 vec_cond_lhs =
6822 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
6823 stmt, NULL);
6824 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
6825 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
6827 vec_cond_rhs =
6828 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
6829 stmt, NULL);
6830 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
6831 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
6832 if (reduc_index == 1)
6833 vec_then_clause = reduc_def;
6834 else
6836 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
6837 stmt, NULL);
6838 vect_is_simple_use (then_clause, stmt, loop_vinfo,
6839 NULL, &gtemp, &def, &dts[2]);
6841 if (reduc_index == 2)
6842 vec_else_clause = reduc_def;
6843 else
6845 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
6846 stmt, NULL);
6847 vect_is_simple_use (else_clause, stmt, loop_vinfo,
6848 NULL, &gtemp, &def, &dts[3]);
6852 else
6854 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
6855 vec_oprnds0.pop ());
6856 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
6857 vec_oprnds1.pop ());
6858 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
6859 vec_oprnds2.pop ());
6860 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
6861 vec_oprnds3.pop ());
6864 if (!slp_node)
6866 vec_oprnds0.quick_push (vec_cond_lhs);
6867 vec_oprnds1.quick_push (vec_cond_rhs);
6868 vec_oprnds2.quick_push (vec_then_clause);
6869 vec_oprnds3.quick_push (vec_else_clause);
6872 /* Arguments are ready. Create the new vector stmt. */
6873 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
6875 vec_cond_rhs = vec_oprnds1[i];
6876 vec_then_clause = vec_oprnds2[i];
6877 vec_else_clause = vec_oprnds3[i];
6879 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
6880 vec_cond_lhs, vec_cond_rhs);
6881 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
6882 vec_compare, vec_then_clause, vec_else_clause);
6884 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
6885 new_temp = make_ssa_name (vec_dest, new_stmt);
6886 gimple_assign_set_lhs (new_stmt, new_temp);
6887 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6888 if (slp_node)
6889 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6892 if (slp_node)
6893 continue;
6895 if (j == 0)
6896 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6897 else
6898 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6900 prev_stmt_info = vinfo_for_stmt (new_stmt);
6903 vec_oprnds0.release ();
6904 vec_oprnds1.release ();
6905 vec_oprnds2.release ();
6906 vec_oprnds3.release ();
6908 return true;
6912 /* Make sure the statement is vectorizable. */
6914 bool
6915 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
6917 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6918 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6919 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
6920 bool ok;
6921 tree scalar_type, vectype;
6922 gimple pattern_stmt;
6923 gimple_seq pattern_def_seq;
6925 if (dump_enabled_p ())
6927 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
6928 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6929 dump_printf (MSG_NOTE, "\n");
6932 if (gimple_has_volatile_ops (stmt))
6934 if (dump_enabled_p ())
6935 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6936 "not vectorized: stmt has volatile operands\n");
6938 return false;
6941 /* Skip stmts that do not need to be vectorized. In loops this is expected
6942 to include:
6943 - the COND_EXPR which is the loop exit condition
6944 - any LABEL_EXPRs in the loop
6945 - computations that are used only for array indexing or loop control.
6946 In basic blocks we only analyze statements that are a part of some SLP
6947 instance, therefore, all the statements are relevant.
6949 Pattern statement needs to be analyzed instead of the original statement
6950 if the original statement is not relevant. Otherwise, we analyze both
6951 statements. In basic blocks we are called from some SLP instance
6952 traversal, don't analyze pattern stmts instead, the pattern stmts
6953 already will be part of SLP instance. */
6955 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
6956 if (!STMT_VINFO_RELEVANT_P (stmt_info)
6957 && !STMT_VINFO_LIVE_P (stmt_info))
6959 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6960 && pattern_stmt
6961 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6962 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6964 /* Analyze PATTERN_STMT instead of the original stmt. */
6965 stmt = pattern_stmt;
6966 stmt_info = vinfo_for_stmt (pattern_stmt);
6967 if (dump_enabled_p ())
6969 dump_printf_loc (MSG_NOTE, vect_location,
6970 "==> examining pattern statement: ");
6971 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6972 dump_printf (MSG_NOTE, "\n");
6975 else
6977 if (dump_enabled_p ())
6978 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
6980 return true;
6983 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6984 && node == NULL
6985 && pattern_stmt
6986 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6987 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6989 /* Analyze PATTERN_STMT too. */
6990 if (dump_enabled_p ())
6992 dump_printf_loc (MSG_NOTE, vect_location,
6993 "==> examining pattern statement: ");
6994 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6995 dump_printf (MSG_NOTE, "\n");
6998 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
6999 return false;
7002 if (is_pattern_stmt_p (stmt_info)
7003 && node == NULL
7004 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
7006 gimple_stmt_iterator si;
7008 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7010 gimple pattern_def_stmt = gsi_stmt (si);
7011 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7012 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7014 /* Analyze def stmt of STMT if it's a pattern stmt. */
7015 if (dump_enabled_p ())
7017 dump_printf_loc (MSG_NOTE, vect_location,
7018 "==> examining pattern def statement: ");
7019 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
7020 dump_printf (MSG_NOTE, "\n");
7023 if (!vect_analyze_stmt (pattern_def_stmt,
7024 need_to_vectorize, node))
7025 return false;
7030 switch (STMT_VINFO_DEF_TYPE (stmt_info))
7032 case vect_internal_def:
7033 break;
7035 case vect_reduction_def:
7036 case vect_nested_cycle:
7037 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
7038 || relevance == vect_used_in_outer_by_reduction
7039 || relevance == vect_unused_in_scope));
7040 break;
7042 case vect_induction_def:
7043 case vect_constant_def:
7044 case vect_external_def:
7045 case vect_unknown_def_type:
7046 default:
7047 gcc_unreachable ();
7050 if (bb_vinfo)
7052 gcc_assert (PURE_SLP_STMT (stmt_info));
7054 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
7055 if (dump_enabled_p ())
7057 dump_printf_loc (MSG_NOTE, vect_location,
7058 "get vectype for scalar type: ");
7059 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
7060 dump_printf (MSG_NOTE, "\n");
7063 vectype = get_vectype_for_scalar_type (scalar_type);
7064 if (!vectype)
7066 if (dump_enabled_p ())
7068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7069 "not SLPed: unsupported data-type ");
7070 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7071 scalar_type);
7072 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7074 return false;
7077 if (dump_enabled_p ())
7079 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7080 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
7081 dump_printf (MSG_NOTE, "\n");
7084 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7087 if (STMT_VINFO_RELEVANT_P (stmt_info))
7089 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
7090 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7091 || (is_gimple_call (stmt)
7092 && gimple_call_lhs (stmt) == NULL_TREE));
7093 *need_to_vectorize = true;
7096 ok = true;
7097 if (!bb_vinfo
7098 && (STMT_VINFO_RELEVANT_P (stmt_info)
7099 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
7100 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, NULL)
7101 || vectorizable_conversion (stmt, NULL, NULL, NULL)
7102 || vectorizable_shift (stmt, NULL, NULL, NULL)
7103 || vectorizable_operation (stmt, NULL, NULL, NULL)
7104 || vectorizable_assignment (stmt, NULL, NULL, NULL)
7105 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
7106 || vectorizable_call (stmt, NULL, NULL, NULL)
7107 || vectorizable_store (stmt, NULL, NULL, NULL)
7108 || vectorizable_reduction (stmt, NULL, NULL, NULL)
7109 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
7110 else
7112 if (bb_vinfo)
7113 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7114 || vectorizable_conversion (stmt, NULL, NULL, node)
7115 || vectorizable_shift (stmt, NULL, NULL, node)
7116 || vectorizable_operation (stmt, NULL, NULL, node)
7117 || vectorizable_assignment (stmt, NULL, NULL, node)
7118 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7119 || vectorizable_call (stmt, NULL, NULL, node)
7120 || vectorizable_store (stmt, NULL, NULL, node)
7121 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7124 if (!ok)
7126 if (dump_enabled_p ())
7128 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7129 "not vectorized: relevant stmt not ");
7130 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7131 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7132 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7135 return false;
7138 if (bb_vinfo)
7139 return true;
7141 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7142 need extra handling, except for vectorizable reductions. */
7143 if (STMT_VINFO_LIVE_P (stmt_info)
7144 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7145 ok = vectorizable_live_operation (stmt, NULL, NULL);
7147 if (!ok)
7149 if (dump_enabled_p ())
7151 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7152 "not vectorized: live stmt not ");
7153 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7154 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7155 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7158 return false;
7161 return true;
7165 /* Function vect_transform_stmt.
7167 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7169 bool
7170 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
7171 bool *grouped_store, slp_tree slp_node,
7172 slp_instance slp_node_instance)
7174 bool is_store = false;
7175 gimple vec_stmt = NULL;
7176 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7177 bool done;
7179 switch (STMT_VINFO_TYPE (stmt_info))
7181 case type_demotion_vec_info_type:
7182 case type_promotion_vec_info_type:
7183 case type_conversion_vec_info_type:
7184 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7185 gcc_assert (done);
7186 break;
7188 case induc_vec_info_type:
7189 gcc_assert (!slp_node);
7190 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7191 gcc_assert (done);
7192 break;
7194 case shift_vec_info_type:
7195 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7196 gcc_assert (done);
7197 break;
7199 case op_vec_info_type:
7200 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7201 gcc_assert (done);
7202 break;
7204 case assignment_vec_info_type:
7205 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7206 gcc_assert (done);
7207 break;
7209 case load_vec_info_type:
7210 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
7211 slp_node_instance);
7212 gcc_assert (done);
7213 break;
7215 case store_vec_info_type:
7216 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7217 gcc_assert (done);
7218 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
7220 /* In case of interleaving, the whole chain is vectorized when the
7221 last store in the chain is reached. Store stmts before the last
7222 one are skipped, and there vec_stmt_info shouldn't be freed
7223 meanwhile. */
7224 *grouped_store = true;
7225 if (STMT_VINFO_VEC_STMT (stmt_info))
7226 is_store = true;
7228 else
7229 is_store = true;
7230 break;
7232 case condition_vec_info_type:
7233 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
7234 gcc_assert (done);
7235 break;
7237 case call_vec_info_type:
7238 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
7239 stmt = gsi_stmt (*gsi);
7240 if (is_gimple_call (stmt)
7241 && gimple_call_internal_p (stmt)
7242 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7243 is_store = true;
7244 break;
7246 case call_simd_clone_vec_info_type:
7247 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7248 stmt = gsi_stmt (*gsi);
7249 break;
7251 case reduc_vec_info_type:
7252 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
7253 gcc_assert (done);
7254 break;
7256 default:
7257 if (!STMT_VINFO_LIVE_P (stmt_info))
7259 if (dump_enabled_p ())
7260 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7261 "stmt not supported.\n");
7262 gcc_unreachable ();
7266 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7267 is being vectorized, but outside the immediately enclosing loop. */
7268 if (vec_stmt
7269 && STMT_VINFO_LOOP_VINFO (stmt_info)
7270 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7271 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
7272 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7273 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
7274 || STMT_VINFO_RELEVANT (stmt_info) ==
7275 vect_used_in_outer_by_reduction))
7277 struct loop *innerloop = LOOP_VINFO_LOOP (
7278 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
7279 imm_use_iterator imm_iter;
7280 use_operand_p use_p;
7281 tree scalar_dest;
7282 gimple exit_phi;
7284 if (dump_enabled_p ())
7285 dump_printf_loc (MSG_NOTE, vect_location,
7286 "Record the vdef for outer-loop vectorization.\n");
7288 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7289 (to be used when vectorizing outer-loop stmts that use the DEF of
7290 STMT). */
7291 if (gimple_code (stmt) == GIMPLE_PHI)
7292 scalar_dest = PHI_RESULT (stmt);
7293 else
7294 scalar_dest = gimple_assign_lhs (stmt);
7296 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7298 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7300 exit_phi = USE_STMT (use_p);
7301 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7306 /* Handle stmts whose DEF is used outside the loop-nest that is
7307 being vectorized. */
7308 if (STMT_VINFO_LIVE_P (stmt_info)
7309 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7311 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7312 gcc_assert (done);
7315 if (vec_stmt)
7316 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
7318 return is_store;
7322 /* Remove a group of stores (for SLP or interleaving), free their
7323 stmt_vec_info. */
7325 void
7326 vect_remove_stores (gimple first_stmt)
7328 gimple next = first_stmt;
7329 gimple tmp;
7330 gimple_stmt_iterator next_si;
7332 while (next)
7334 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7336 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7337 if (is_pattern_stmt_p (stmt_info))
7338 next = STMT_VINFO_RELATED_STMT (stmt_info);
7339 /* Free the attached stmt_vec_info and remove the stmt. */
7340 next_si = gsi_for_stmt (next);
7341 unlink_stmt_vdef (next);
7342 gsi_remove (&next_si, true);
7343 release_defs (next);
7344 free_stmt_vec_info (next);
7345 next = tmp;
7350 /* Function new_stmt_vec_info.
7352 Create and initialize a new stmt_vec_info struct for STMT. */
7354 stmt_vec_info
7355 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
7356 bb_vec_info bb_vinfo)
7358 stmt_vec_info res;
7359 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7361 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7362 STMT_VINFO_STMT (res) = stmt;
7363 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
7364 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
7365 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
7366 STMT_VINFO_LIVE_P (res) = false;
7367 STMT_VINFO_VECTYPE (res) = NULL;
7368 STMT_VINFO_VEC_STMT (res) = NULL;
7369 STMT_VINFO_VECTORIZABLE (res) = true;
7370 STMT_VINFO_IN_PATTERN_P (res) = false;
7371 STMT_VINFO_RELATED_STMT (res) = NULL;
7372 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
7373 STMT_VINFO_DATA_REF (res) = NULL;
7375 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7376 STMT_VINFO_DR_OFFSET (res) = NULL;
7377 STMT_VINFO_DR_INIT (res) = NULL;
7378 STMT_VINFO_DR_STEP (res) = NULL;
7379 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7381 if (gimple_code (stmt) == GIMPLE_PHI
7382 && is_loop_header_bb_p (gimple_bb (stmt)))
7383 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7384 else
7385 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7387 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
7388 STMT_SLP_TYPE (res) = loop_vect;
7389 GROUP_FIRST_ELEMENT (res) = NULL;
7390 GROUP_NEXT_ELEMENT (res) = NULL;
7391 GROUP_SIZE (res) = 0;
7392 GROUP_STORE_COUNT (res) = 0;
7393 GROUP_GAP (res) = 0;
7394 GROUP_SAME_DR_STMT (res) = NULL;
7396 return res;
7400 /* Create a hash table for stmt_vec_info. */
7402 void
7403 init_stmt_vec_info_vec (void)
7405 gcc_assert (!stmt_vec_info_vec.exists ());
7406 stmt_vec_info_vec.create (50);
7410 /* Free hash table for stmt_vec_info. */
7412 void
7413 free_stmt_vec_info_vec (void)
7415 unsigned int i;
7416 vec_void_p info;
7417 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7418 if (info != NULL)
7419 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
7420 gcc_assert (stmt_vec_info_vec.exists ());
7421 stmt_vec_info_vec.release ();
7425 /* Free stmt vectorization related info. */
7427 void
7428 free_stmt_vec_info (gimple stmt)
7430 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7432 if (!stmt_info)
7433 return;
7435 /* Check if this statement has a related "pattern stmt"
7436 (introduced by the vectorizer during the pattern recognition
7437 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7438 too. */
7439 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7441 stmt_vec_info patt_info
7442 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7443 if (patt_info)
7445 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
7446 gimple patt_stmt = STMT_VINFO_STMT (patt_info);
7447 gimple_set_bb (patt_stmt, NULL);
7448 tree lhs = gimple_get_lhs (patt_stmt);
7449 if (TREE_CODE (lhs) == SSA_NAME)
7450 release_ssa_name (lhs);
7451 if (seq)
7453 gimple_stmt_iterator si;
7454 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
7456 gimple seq_stmt = gsi_stmt (si);
7457 gimple_set_bb (seq_stmt, NULL);
7458 lhs = gimple_get_lhs (patt_stmt);
7459 if (TREE_CODE (lhs) == SSA_NAME)
7460 release_ssa_name (lhs);
7461 free_stmt_vec_info (seq_stmt);
7464 free_stmt_vec_info (patt_stmt);
7468 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
7469 set_vinfo_for_stmt (stmt, NULL);
7470 free (stmt_info);
7474 /* Function get_vectype_for_scalar_type_and_size.
7476 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7477 by the target. */
7479 static tree
7480 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
7482 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
7483 enum machine_mode simd_mode;
7484 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
7485 int nunits;
7486 tree vectype;
7488 if (nbytes == 0)
7489 return NULL_TREE;
7491 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7492 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7493 return NULL_TREE;
7495 /* For vector types of elements whose mode precision doesn't
7496 match their types precision we use a element type of mode
7497 precision. The vectorization routines will have to make sure
7498 they support the proper result truncation/extension.
7499 We also make sure to build vector types with INTEGER_TYPE
7500 component type only. */
7501 if (INTEGRAL_TYPE_P (scalar_type)
7502 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7503 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7504 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7505 TYPE_UNSIGNED (scalar_type));
7507 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7508 When the component mode passes the above test simply use a type
7509 corresponding to that mode. The theory is that any use that
7510 would cause problems with this will disable vectorization anyway. */
7511 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
7512 && !INTEGRAL_TYPE_P (scalar_type))
7513 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7515 /* We can't build a vector type of elements with alignment bigger than
7516 their size. */
7517 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
7518 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7519 TYPE_UNSIGNED (scalar_type));
7521 /* If we felt back to using the mode fail if there was
7522 no scalar type for it. */
7523 if (scalar_type == NULL_TREE)
7524 return NULL_TREE;
7526 /* If no size was supplied use the mode the target prefers. Otherwise
7527 lookup a vector mode of the specified size. */
7528 if (size == 0)
7529 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7530 else
7531 simd_mode = mode_for_vector (inner_mode, size / nbytes);
7532 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7533 if (nunits <= 1)
7534 return NULL_TREE;
7536 vectype = build_vector_type (scalar_type, nunits);
7538 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7539 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
7540 return NULL_TREE;
7542 return vectype;
7545 unsigned int current_vector_size;
7547 /* Function get_vectype_for_scalar_type.
7549 Returns the vector type corresponding to SCALAR_TYPE as supported
7550 by the target. */
7552 tree
7553 get_vectype_for_scalar_type (tree scalar_type)
7555 tree vectype;
7556 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7557 current_vector_size);
7558 if (vectype
7559 && current_vector_size == 0)
7560 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7561 return vectype;
7564 /* Function get_same_sized_vectype
7566 Returns a vector type corresponding to SCALAR_TYPE of size
7567 VECTOR_TYPE if supported by the target. */
7569 tree
7570 get_same_sized_vectype (tree scalar_type, tree vector_type)
7572 return get_vectype_for_scalar_type_and_size
7573 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
7576 /* Function vect_is_simple_use.
7578 Input:
7579 LOOP_VINFO - the vect info of the loop that is being vectorized.
7580 BB_VINFO - the vect info of the basic block that is being vectorized.
7581 OPERAND - operand of STMT in the loop or bb.
7582 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7584 Returns whether a stmt with OPERAND can be vectorized.
7585 For loops, supportable operands are constants, loop invariants, and operands
7586 that are defined by the current iteration of the loop. Unsupportable
7587 operands are those that are defined by a previous iteration of the loop (as
7588 is the case in reduction/induction computations).
7589 For basic blocks, supportable operands are constants and bb invariants.
7590 For now, operands defined outside the basic block are not supported. */
7592 bool
7593 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7594 bb_vec_info bb_vinfo, gimple *def_stmt,
7595 tree *def, enum vect_def_type *dt)
7597 basic_block bb;
7598 stmt_vec_info stmt_vinfo;
7599 struct loop *loop = NULL;
7601 if (loop_vinfo)
7602 loop = LOOP_VINFO_LOOP (loop_vinfo);
7604 *def_stmt = NULL;
7605 *def = NULL_TREE;
7607 if (dump_enabled_p ())
7609 dump_printf_loc (MSG_NOTE, vect_location,
7610 "vect_is_simple_use: operand ");
7611 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
7612 dump_printf (MSG_NOTE, "\n");
7615 if (CONSTANT_CLASS_P (operand))
7617 *dt = vect_constant_def;
7618 return true;
7621 if (is_gimple_min_invariant (operand))
7623 *def = operand;
7624 *dt = vect_external_def;
7625 return true;
7628 if (TREE_CODE (operand) == PAREN_EXPR)
7630 if (dump_enabled_p ())
7631 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
7632 operand = TREE_OPERAND (operand, 0);
7635 if (TREE_CODE (operand) != SSA_NAME)
7637 if (dump_enabled_p ())
7638 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7639 "not ssa-name.\n");
7640 return false;
7643 *def_stmt = SSA_NAME_DEF_STMT (operand);
7644 if (*def_stmt == NULL)
7646 if (dump_enabled_p ())
7647 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7648 "no def_stmt.\n");
7649 return false;
7652 if (dump_enabled_p ())
7654 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
7655 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
7656 dump_printf (MSG_NOTE, "\n");
7659 /* Empty stmt is expected only in case of a function argument.
7660 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7661 if (gimple_nop_p (*def_stmt))
7663 *def = operand;
7664 *dt = vect_external_def;
7665 return true;
7668 bb = gimple_bb (*def_stmt);
7670 if ((loop && !flow_bb_inside_loop_p (loop, bb))
7671 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
7672 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
7673 *dt = vect_external_def;
7674 else
7676 stmt_vinfo = vinfo_for_stmt (*def_stmt);
7677 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
7680 if (*dt == vect_unknown_def_type
7681 || (stmt
7682 && *dt == vect_double_reduction_def
7683 && gimple_code (stmt) != GIMPLE_PHI))
7685 if (dump_enabled_p ())
7686 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7687 "Unsupported pattern.\n");
7688 return false;
7691 if (dump_enabled_p ())
7692 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt);
7694 switch (gimple_code (*def_stmt))
7696 case GIMPLE_PHI:
7697 *def = gimple_phi_result (*def_stmt);
7698 break;
7700 case GIMPLE_ASSIGN:
7701 *def = gimple_assign_lhs (*def_stmt);
7702 break;
7704 case GIMPLE_CALL:
7705 *def = gimple_call_lhs (*def_stmt);
7706 if (*def != NULL)
7707 break;
7708 /* FALLTHRU */
7709 default:
7710 if (dump_enabled_p ())
7711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7712 "unsupported defining stmt:\n");
7713 return false;
7716 return true;
7719 /* Function vect_is_simple_use_1.
7721 Same as vect_is_simple_use_1 but also determines the vector operand
7722 type of OPERAND and stores it to *VECTYPE. If the definition of
7723 OPERAND is vect_uninitialized_def, vect_constant_def or
7724 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
7725 is responsible to compute the best suited vector type for the
7726 scalar operand. */
7728 bool
7729 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7730 bb_vec_info bb_vinfo, gimple *def_stmt,
7731 tree *def, enum vect_def_type *dt, tree *vectype)
7733 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
7734 def, dt))
7735 return false;
7737 /* Now get a vector type if the def is internal, otherwise supply
7738 NULL_TREE and leave it up to the caller to figure out a proper
7739 type for the use stmt. */
7740 if (*dt == vect_internal_def
7741 || *dt == vect_induction_def
7742 || *dt == vect_reduction_def
7743 || *dt == vect_double_reduction_def
7744 || *dt == vect_nested_cycle)
7746 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
7748 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7749 && !STMT_VINFO_RELEVANT (stmt_info)
7750 && !STMT_VINFO_LIVE_P (stmt_info))
7751 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7753 *vectype = STMT_VINFO_VECTYPE (stmt_info);
7754 gcc_assert (*vectype != NULL_TREE);
7756 else if (*dt == vect_uninitialized_def
7757 || *dt == vect_constant_def
7758 || *dt == vect_external_def)
7759 *vectype = NULL_TREE;
7760 else
7761 gcc_unreachable ();
7763 return true;
7767 /* Function supportable_widening_operation
7769 Check whether an operation represented by the code CODE is a
7770 widening operation that is supported by the target platform in
7771 vector form (i.e., when operating on arguments of type VECTYPE_IN
7772 producing a result of type VECTYPE_OUT).
7774 Widening operations we currently support are NOP (CONVERT), FLOAT
7775 and WIDEN_MULT. This function checks if these operations are supported
7776 by the target platform either directly (via vector tree-codes), or via
7777 target builtins.
7779 Output:
7780 - CODE1 and CODE2 are codes of vector operations to be used when
7781 vectorizing the operation, if available.
7782 - MULTI_STEP_CVT determines the number of required intermediate steps in
7783 case of multi-step conversion (like char->short->int - in that case
7784 MULTI_STEP_CVT will be 1).
7785 - INTERM_TYPES contains the intermediate type required to perform the
7786 widening operation (short in the above example). */
7788 bool
7789 supportable_widening_operation (enum tree_code code, gimple stmt,
7790 tree vectype_out, tree vectype_in,
7791 enum tree_code *code1, enum tree_code *code2,
7792 int *multi_step_cvt,
7793 vec<tree> *interm_types)
7795 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7796 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
7797 struct loop *vect_loop = NULL;
7798 enum machine_mode vec_mode;
7799 enum insn_code icode1, icode2;
7800 optab optab1, optab2;
7801 tree vectype = vectype_in;
7802 tree wide_vectype = vectype_out;
7803 enum tree_code c1, c2;
7804 int i;
7805 tree prev_type, intermediate_type;
7806 enum machine_mode intermediate_mode, prev_mode;
7807 optab optab3, optab4;
7809 *multi_step_cvt = 0;
7810 if (loop_info)
7811 vect_loop = LOOP_VINFO_LOOP (loop_info);
7813 switch (code)
7815 case WIDEN_MULT_EXPR:
7816 /* The result of a vectorized widening operation usually requires
7817 two vectors (because the widened results do not fit into one vector).
7818 The generated vector results would normally be expected to be
7819 generated in the same order as in the original scalar computation,
7820 i.e. if 8 results are generated in each vector iteration, they are
7821 to be organized as follows:
7822 vect1: [res1,res2,res3,res4],
7823 vect2: [res5,res6,res7,res8].
7825 However, in the special case that the result of the widening
7826 operation is used in a reduction computation only, the order doesn't
7827 matter (because when vectorizing a reduction we change the order of
7828 the computation). Some targets can take advantage of this and
7829 generate more efficient code. For example, targets like Altivec,
7830 that support widen_mult using a sequence of {mult_even,mult_odd}
7831 generate the following vectors:
7832 vect1: [res1,res3,res5,res7],
7833 vect2: [res2,res4,res6,res8].
7835 When vectorizing outer-loops, we execute the inner-loop sequentially
7836 (each vectorized inner-loop iteration contributes to VF outer-loop
7837 iterations in parallel). We therefore don't allow to change the
7838 order of the computation in the inner-loop during outer-loop
7839 vectorization. */
7840 /* TODO: Another case in which order doesn't *really* matter is when we
7841 widen and then contract again, e.g. (short)((int)x * y >> 8).
7842 Normally, pack_trunc performs an even/odd permute, whereas the
7843 repack from an even/odd expansion would be an interleave, which
7844 would be significantly simpler for e.g. AVX2. */
7845 /* In any case, in order to avoid duplicating the code below, recurse
7846 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
7847 are properly set up for the caller. If we fail, we'll continue with
7848 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
7849 if (vect_loop
7850 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
7851 && !nested_in_vect_loop_p (vect_loop, stmt)
7852 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
7853 stmt, vectype_out, vectype_in,
7854 code1, code2, multi_step_cvt,
7855 interm_types))
7857 /* Elements in a vector with vect_used_by_reduction property cannot
7858 be reordered if the use chain with this property does not have the
7859 same operation. One such an example is s += a * b, where elements
7860 in a and b cannot be reordered. Here we check if the vector defined
7861 by STMT is only directly used in the reduction statement. */
7862 tree lhs = gimple_assign_lhs (stmt);
7863 use_operand_p dummy;
7864 gimple use_stmt;
7865 stmt_vec_info use_stmt_info = NULL;
7866 if (single_imm_use (lhs, &dummy, &use_stmt)
7867 && (use_stmt_info = vinfo_for_stmt (use_stmt))
7868 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
7869 return true;
7871 c1 = VEC_WIDEN_MULT_LO_EXPR;
7872 c2 = VEC_WIDEN_MULT_HI_EXPR;
7873 break;
7875 case VEC_WIDEN_MULT_EVEN_EXPR:
7876 /* Support the recursion induced just above. */
7877 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
7878 c2 = VEC_WIDEN_MULT_ODD_EXPR;
7879 break;
7881 case WIDEN_LSHIFT_EXPR:
7882 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
7883 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
7884 break;
7886 CASE_CONVERT:
7887 c1 = VEC_UNPACK_LO_EXPR;
7888 c2 = VEC_UNPACK_HI_EXPR;
7889 break;
7891 case FLOAT_EXPR:
7892 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
7893 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
7894 break;
7896 case FIX_TRUNC_EXPR:
7897 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
7898 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
7899 computing the operation. */
7900 return false;
7902 default:
7903 gcc_unreachable ();
7906 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
7908 enum tree_code ctmp = c1;
7909 c1 = c2;
7910 c2 = ctmp;
7913 if (code == FIX_TRUNC_EXPR)
7915 /* The signedness is determined from output operand. */
7916 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
7917 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
7919 else
7921 optab1 = optab_for_tree_code (c1, vectype, optab_default);
7922 optab2 = optab_for_tree_code (c2, vectype, optab_default);
7925 if (!optab1 || !optab2)
7926 return false;
7928 vec_mode = TYPE_MODE (vectype);
7929 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
7930 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
7931 return false;
7933 *code1 = c1;
7934 *code2 = c2;
7936 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
7937 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
7938 return true;
7940 /* Check if it's a multi-step conversion that can be done using intermediate
7941 types. */
7943 prev_type = vectype;
7944 prev_mode = vec_mode;
7946 if (!CONVERT_EXPR_CODE_P (code))
7947 return false;
7949 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
7950 intermediate steps in promotion sequence. We try
7951 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
7952 not. */
7953 interm_types->create (MAX_INTERM_CVT_STEPS);
7954 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
7956 intermediate_mode = insn_data[icode1].operand[0].mode;
7957 intermediate_type
7958 = lang_hooks.types.type_for_mode (intermediate_mode,
7959 TYPE_UNSIGNED (prev_type));
7960 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
7961 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
7963 if (!optab3 || !optab4
7964 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
7965 || insn_data[icode1].operand[0].mode != intermediate_mode
7966 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
7967 || insn_data[icode2].operand[0].mode != intermediate_mode
7968 || ((icode1 = optab_handler (optab3, intermediate_mode))
7969 == CODE_FOR_nothing)
7970 || ((icode2 = optab_handler (optab4, intermediate_mode))
7971 == CODE_FOR_nothing))
7972 break;
7974 interm_types->quick_push (intermediate_type);
7975 (*multi_step_cvt)++;
7977 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
7978 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
7979 return true;
7981 prev_type = intermediate_type;
7982 prev_mode = intermediate_mode;
7985 interm_types->release ();
7986 return false;
7990 /* Function supportable_narrowing_operation
7992 Check whether an operation represented by the code CODE is a
7993 narrowing operation that is supported by the target platform in
7994 vector form (i.e., when operating on arguments of type VECTYPE_IN
7995 and producing a result of type VECTYPE_OUT).
7997 Narrowing operations we currently support are NOP (CONVERT) and
7998 FIX_TRUNC. This function checks if these operations are supported by
7999 the target platform directly via vector tree-codes.
8001 Output:
8002 - CODE1 is the code of a vector operation to be used when
8003 vectorizing the operation, if available.
8004 - MULTI_STEP_CVT determines the number of required intermediate steps in
8005 case of multi-step conversion (like int->short->char - in that case
8006 MULTI_STEP_CVT will be 1).
8007 - INTERM_TYPES contains the intermediate type required to perform the
8008 narrowing operation (short in the above example). */
8010 bool
8011 supportable_narrowing_operation (enum tree_code code,
8012 tree vectype_out, tree vectype_in,
8013 enum tree_code *code1, int *multi_step_cvt,
8014 vec<tree> *interm_types)
8016 enum machine_mode vec_mode;
8017 enum insn_code icode1;
8018 optab optab1, interm_optab;
8019 tree vectype = vectype_in;
8020 tree narrow_vectype = vectype_out;
8021 enum tree_code c1;
8022 tree intermediate_type;
8023 enum machine_mode intermediate_mode, prev_mode;
8024 int i;
8025 bool uns;
8027 *multi_step_cvt = 0;
8028 switch (code)
8030 CASE_CONVERT:
8031 c1 = VEC_PACK_TRUNC_EXPR;
8032 break;
8034 case FIX_TRUNC_EXPR:
8035 c1 = VEC_PACK_FIX_TRUNC_EXPR;
8036 break;
8038 case FLOAT_EXPR:
8039 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8040 tree code and optabs used for computing the operation. */
8041 return false;
8043 default:
8044 gcc_unreachable ();
8047 if (code == FIX_TRUNC_EXPR)
8048 /* The signedness is determined from output operand. */
8049 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8050 else
8051 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8053 if (!optab1)
8054 return false;
8056 vec_mode = TYPE_MODE (vectype);
8057 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
8058 return false;
8060 *code1 = c1;
8062 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8063 return true;
8065 /* Check if it's a multi-step conversion that can be done using intermediate
8066 types. */
8067 prev_mode = vec_mode;
8068 if (code == FIX_TRUNC_EXPR)
8069 uns = TYPE_UNSIGNED (vectype_out);
8070 else
8071 uns = TYPE_UNSIGNED (vectype);
8073 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8074 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8075 costly than signed. */
8076 if (code == FIX_TRUNC_EXPR && uns)
8078 enum insn_code icode2;
8080 intermediate_type
8081 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8082 interm_optab
8083 = optab_for_tree_code (c1, intermediate_type, optab_default);
8084 if (interm_optab != unknown_optab
8085 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8086 && insn_data[icode1].operand[0].mode
8087 == insn_data[icode2].operand[0].mode)
8089 uns = false;
8090 optab1 = interm_optab;
8091 icode1 = icode2;
8095 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8096 intermediate steps in promotion sequence. We try
8097 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8098 interm_types->create (MAX_INTERM_CVT_STEPS);
8099 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8101 intermediate_mode = insn_data[icode1].operand[0].mode;
8102 intermediate_type
8103 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8104 interm_optab
8105 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8106 optab_default);
8107 if (!interm_optab
8108 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8109 || insn_data[icode1].operand[0].mode != intermediate_mode
8110 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8111 == CODE_FOR_nothing))
8112 break;
8114 interm_types->quick_push (intermediate_type);
8115 (*multi_step_cvt)++;
8117 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8118 return true;
8120 prev_mode = intermediate_mode;
8121 optab1 = interm_optab;
8124 interm_types->release ();
8125 return false;