PR target/65604
[official-gcc.git] / gcc / tree-vect-stmts.c
blobfa4a364983a62496210b275d09045be5cb75ff50
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Return the vectorized type for the given statement. */
57 tree
58 stmt_vectype (struct _stmt_vec_info *stmt_info)
60 return STMT_VINFO_VECTYPE (stmt_info);
63 /* Return TRUE iff the given statement is in an inner loop relative to
64 the loop being vectorized. */
65 bool
66 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
68 gimple *stmt = STMT_VINFO_STMT (stmt_info);
69 basic_block bb = gimple_bb (stmt);
70 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
71 struct loop* loop;
73 if (!loop_vinfo)
74 return false;
76 loop = LOOP_VINFO_LOOP (loop_vinfo);
78 return (bb->loop_father == loop->inner);
81 /* Record the cost of a statement, either by directly informing the
82 target model or by saving it in a vector for later processing.
83 Return a preliminary estimate of the statement's cost. */
85 unsigned
86 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
87 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
88 int misalign, enum vect_cost_model_location where)
90 if (body_cost_vec)
92 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
93 stmt_info_for_cost si = { count, kind,
94 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
95 misalign };
96 body_cost_vec->safe_push (si);
97 return (unsigned)
98 (builtin_vectorization_cost (kind, vectype, misalign) * count);
100 else
101 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
102 count, kind, stmt_info, misalign, where);
105 /* Return a variable of type ELEM_TYPE[NELEMS]. */
107 static tree
108 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
110 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
111 "vect_array");
114 /* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
119 static tree
120 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
121 tree array, unsigned HOST_WIDE_INT n)
123 tree vect_type, vect, vect_name, array_ref;
124 gimple *new_stmt;
126 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
127 vect_type = TREE_TYPE (TREE_TYPE (array));
128 vect = vect_create_destination_var (scalar_dest, vect_type);
129 array_ref = build4 (ARRAY_REF, vect_type, array,
130 build_int_cst (size_type_node, n),
131 NULL_TREE, NULL_TREE);
133 new_stmt = gimple_build_assign (vect, array_ref);
134 vect_name = make_ssa_name (vect, new_stmt);
135 gimple_assign_set_lhs (new_stmt, vect_name);
136 vect_finish_stmt_generation (stmt, new_stmt, gsi);
138 return vect_name;
141 /* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
145 static void
146 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
147 tree array, unsigned HOST_WIDE_INT n)
149 tree array_ref;
150 gimple *new_stmt;
152 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
153 build_int_cst (size_type_node, n),
154 NULL_TREE, NULL_TREE);
156 new_stmt = gimple_build_assign (array_ref, vect);
157 vect_finish_stmt_generation (stmt, new_stmt, gsi);
160 /* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
162 (and its group). */
164 static tree
165 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
167 tree mem_ref, alias_ptr_type;
169 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
170 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
171 /* Arrays have the same alignment as their type. */
172 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
173 return mem_ref;
176 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
178 /* Function vect_mark_relevant.
180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
182 static void
183 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
184 enum vect_relevant relevant, bool live_p,
185 bool used_in_pattern)
187 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
188 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
189 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
190 gimple *pattern_stmt;
192 if (dump_enabled_p ())
194 dump_printf_loc (MSG_NOTE, vect_location,
195 "mark relevant %d, live %d: ", relevant, live_p);
196 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
199 /* If this stmt is an original stmt in a pattern, we might need to mark its
200 related pattern stmt instead of the original stmt. However, such stmts
201 may have their own uses that are not in any pattern, in such cases the
202 stmt itself should be marked. */
203 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
205 bool found = false;
206 if (!used_in_pattern)
208 imm_use_iterator imm_iter;
209 use_operand_p use_p;
210 gimple *use_stmt;
211 tree lhs;
212 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
213 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
215 if (is_gimple_assign (stmt))
216 lhs = gimple_assign_lhs (stmt);
217 else
218 lhs = gimple_call_lhs (stmt);
220 /* This use is out of pattern use, if LHS has other uses that are
221 pattern uses, we should mark the stmt itself, and not the pattern
222 stmt. */
223 if (lhs && TREE_CODE (lhs) == SSA_NAME)
224 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
226 if (is_gimple_debug (USE_STMT (use_p)))
227 continue;
228 use_stmt = USE_STMT (use_p);
230 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
231 continue;
233 if (vinfo_for_stmt (use_stmt)
234 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
236 found = true;
237 break;
242 if (!found)
244 /* This is the last stmt in a sequence that was detected as a
245 pattern that can potentially be vectorized. Don't mark the stmt
246 as relevant/live because it's not going to be vectorized.
247 Instead mark the pattern-stmt that replaces it. */
249 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
251 if (dump_enabled_p ())
252 dump_printf_loc (MSG_NOTE, vect_location,
253 "last stmt in pattern. don't mark"
254 " relevant/live.\n");
255 stmt_info = vinfo_for_stmt (pattern_stmt);
256 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
257 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
258 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
259 stmt = pattern_stmt;
263 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
264 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
265 STMT_VINFO_RELEVANT (stmt_info) = relevant;
267 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
268 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
270 if (dump_enabled_p ())
271 dump_printf_loc (MSG_NOTE, vect_location,
272 "already marked relevant/live.\n");
273 return;
276 worklist->safe_push (stmt);
280 /* Function vect_stmt_relevant_p.
282 Return true if STMT in loop that is represented by LOOP_VINFO is
283 "relevant for vectorization".
285 A stmt is considered "relevant for vectorization" if:
286 - it has uses outside the loop.
287 - it has vdefs (it alters memory).
288 - control stmts in the loop (except for the exit condition).
290 CHECKME: what other side effects would the vectorizer allow? */
292 static bool
293 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
294 enum vect_relevant *relevant, bool *live_p)
296 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
297 ssa_op_iter op_iter;
298 imm_use_iterator imm_iter;
299 use_operand_p use_p;
300 def_operand_p def_p;
302 *relevant = vect_unused_in_scope;
303 *live_p = false;
305 /* cond stmt other than loop exit cond. */
306 if (is_ctrl_stmt (stmt)
307 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
308 != loop_exit_ctrl_vec_info_type)
309 *relevant = vect_used_in_scope;
311 /* changing memory. */
312 if (gimple_code (stmt) != GIMPLE_PHI)
313 if (gimple_vdef (stmt)
314 && !gimple_clobber_p (stmt))
316 if (dump_enabled_p ())
317 dump_printf_loc (MSG_NOTE, vect_location,
318 "vec_stmt_relevant_p: stmt has vdefs.\n");
319 *relevant = vect_used_in_scope;
322 /* uses outside the loop. */
323 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
325 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
327 basic_block bb = gimple_bb (USE_STMT (use_p));
328 if (!flow_bb_inside_loop_p (loop, bb))
330 if (dump_enabled_p ())
331 dump_printf_loc (MSG_NOTE, vect_location,
332 "vec_stmt_relevant_p: used out of loop.\n");
334 if (is_gimple_debug (USE_STMT (use_p)))
335 continue;
337 /* We expect all such uses to be in the loop exit phis
338 (because of loop closed form) */
339 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
340 gcc_assert (bb == single_exit (loop)->dest);
342 *live_p = true;
347 return (*live_p || *relevant);
351 /* Function exist_non_indexing_operands_for_use_p
353 USE is one of the uses attached to STMT. Check if USE is
354 used in STMT for anything other than indexing an array. */
356 static bool
357 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
359 tree operand;
360 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
362 /* USE corresponds to some operand in STMT. If there is no data
363 reference in STMT, then any operand that corresponds to USE
364 is not indexing an array. */
365 if (!STMT_VINFO_DATA_REF (stmt_info))
366 return true;
368 /* STMT has a data_ref. FORNOW this means that its of one of
369 the following forms:
370 -1- ARRAY_REF = var
371 -2- var = ARRAY_REF
372 (This should have been verified in analyze_data_refs).
374 'var' in the second case corresponds to a def, not a use,
375 so USE cannot correspond to any operands that are not used
376 for array indexing.
378 Therefore, all we need to check is if STMT falls into the
379 first case, and whether var corresponds to USE. */
381 if (!gimple_assign_copy_p (stmt))
383 if (is_gimple_call (stmt)
384 && gimple_call_internal_p (stmt))
385 switch (gimple_call_internal_fn (stmt))
387 case IFN_MASK_STORE:
388 operand = gimple_call_arg (stmt, 3);
389 if (operand == use)
390 return true;
391 /* FALLTHRU */
392 case IFN_MASK_LOAD:
393 operand = gimple_call_arg (stmt, 2);
394 if (operand == use)
395 return true;
396 break;
397 default:
398 break;
400 return false;
403 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
404 return false;
405 operand = gimple_assign_rhs1 (stmt);
406 if (TREE_CODE (operand) != SSA_NAME)
407 return false;
409 if (operand == use)
410 return true;
412 return false;
417 Function process_use.
419 Inputs:
420 - a USE in STMT in a loop represented by LOOP_VINFO
421 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
422 that defined USE. This is done by calling mark_relevant and passing it
423 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
424 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
425 be performed.
427 Outputs:
428 Generally, LIVE_P and RELEVANT are used to define the liveness and
429 relevance info of the DEF_STMT of this USE:
430 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
431 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
432 Exceptions:
433 - case 1: If USE is used only for address computations (e.g. array indexing),
434 which does not need to be directly vectorized, then the liveness/relevance
435 of the respective DEF_STMT is left unchanged.
436 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
437 skip DEF_STMT cause it had already been processed.
438 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
439 be modified accordingly.
441 Return true if everything is as expected. Return false otherwise. */
443 static bool
444 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
445 enum vect_relevant relevant, vec<gimple *> *worklist,
446 bool force)
448 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
449 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
450 stmt_vec_info dstmt_vinfo;
451 basic_block bb, def_bb;
452 gimple *def_stmt;
453 enum vect_def_type dt;
455 /* case 1: we are only interested in uses that need to be vectorized. Uses
456 that are used for address computation are not considered relevant. */
457 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
458 return true;
460 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
462 if (dump_enabled_p ())
463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
464 "not vectorized: unsupported use in stmt.\n");
465 return false;
468 if (!def_stmt || gimple_nop_p (def_stmt))
469 return true;
471 def_bb = gimple_bb (def_stmt);
472 if (!flow_bb_inside_loop_p (loop, def_bb))
474 if (dump_enabled_p ())
475 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
476 return true;
479 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
480 DEF_STMT must have already been processed, because this should be the
481 only way that STMT, which is a reduction-phi, was put in the worklist,
482 as there should be no other uses for DEF_STMT in the loop. So we just
483 check that everything is as expected, and we are done. */
484 dstmt_vinfo = vinfo_for_stmt (def_stmt);
485 bb = gimple_bb (stmt);
486 if (gimple_code (stmt) == GIMPLE_PHI
487 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
488 && gimple_code (def_stmt) != GIMPLE_PHI
489 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
490 && bb->loop_father == def_bb->loop_father)
492 if (dump_enabled_p ())
493 dump_printf_loc (MSG_NOTE, vect_location,
494 "reduc-stmt defining reduc-phi in the same nest.\n");
495 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
496 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
497 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
498 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
499 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
500 return true;
503 /* case 3a: outer-loop stmt defining an inner-loop stmt:
504 outer-loop-header-bb:
505 d = def_stmt
506 inner-loop:
507 stmt # use (d)
508 outer-loop-tail-bb:
509 ... */
510 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
512 if (dump_enabled_p ())
513 dump_printf_loc (MSG_NOTE, vect_location,
514 "outer-loop def-stmt defining inner-loop stmt.\n");
516 switch (relevant)
518 case vect_unused_in_scope:
519 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
520 vect_used_in_scope : vect_unused_in_scope;
521 break;
523 case vect_used_in_outer_by_reduction:
524 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
525 relevant = vect_used_by_reduction;
526 break;
528 case vect_used_in_outer:
529 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
530 relevant = vect_used_in_scope;
531 break;
533 case vect_used_in_scope:
534 break;
536 default:
537 gcc_unreachable ();
541 /* case 3b: inner-loop stmt defining an outer-loop stmt:
542 outer-loop-header-bb:
544 inner-loop:
545 d = def_stmt
546 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
547 stmt # use (d) */
548 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
550 if (dump_enabled_p ())
551 dump_printf_loc (MSG_NOTE, vect_location,
552 "inner-loop def-stmt defining outer-loop stmt.\n");
554 switch (relevant)
556 case vect_unused_in_scope:
557 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
558 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
559 vect_used_in_outer_by_reduction : vect_unused_in_scope;
560 break;
562 case vect_used_by_reduction:
563 relevant = vect_used_in_outer_by_reduction;
564 break;
566 case vect_used_in_scope:
567 relevant = vect_used_in_outer;
568 break;
570 default:
571 gcc_unreachable ();
575 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
576 is_pattern_stmt_p (stmt_vinfo));
577 return true;
581 /* Function vect_mark_stmts_to_be_vectorized.
583 Not all stmts in the loop need to be vectorized. For example:
585 for i...
586 for j...
587 1. T0 = i + j
588 2. T1 = a[T0]
590 3. j = j + 1
592 Stmt 1 and 3 do not need to be vectorized, because loop control and
593 addressing of vectorized data-refs are handled differently.
595 This pass detects such stmts. */
597 bool
598 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
600 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
601 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
602 unsigned int nbbs = loop->num_nodes;
603 gimple_stmt_iterator si;
604 gimple *stmt;
605 unsigned int i;
606 stmt_vec_info stmt_vinfo;
607 basic_block bb;
608 gimple *phi;
609 bool live_p;
610 enum vect_relevant relevant, tmp_relevant;
611 enum vect_def_type def_type;
613 if (dump_enabled_p ())
614 dump_printf_loc (MSG_NOTE, vect_location,
615 "=== vect_mark_stmts_to_be_vectorized ===\n");
617 auto_vec<gimple *, 64> worklist;
619 /* 1. Init worklist. */
620 for (i = 0; i < nbbs; i++)
622 bb = bbs[i];
623 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
625 phi = gsi_stmt (si);
626 if (dump_enabled_p ())
628 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
629 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
632 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
633 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
635 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
637 stmt = gsi_stmt (si);
638 if (dump_enabled_p ())
640 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
641 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
644 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
645 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
649 /* 2. Process_worklist */
650 while (worklist.length () > 0)
652 use_operand_p use_p;
653 ssa_op_iter iter;
655 stmt = worklist.pop ();
656 if (dump_enabled_p ())
658 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
659 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant and live/dead according to the
664 liveness and relevance properties of STMT. */
665 stmt_vinfo = vinfo_for_stmt (stmt);
666 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
667 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
669 /* Generally, the liveness and relevance properties of STMT are
670 propagated as is to the DEF_STMTs of its USEs:
671 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
672 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
674 One exception is when STMT has been identified as defining a reduction
675 variable; in this case we set the liveness/relevance as follows:
676 live_p = false
677 relevant = vect_used_by_reduction
678 This is because we distinguish between two kinds of relevant stmts -
679 those that are used by a reduction computation, and those that are
680 (also) used by a regular computation. This allows us later on to
681 identify stmts that are used solely by a reduction, and therefore the
682 order of the results that they produce does not have to be kept. */
684 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
685 tmp_relevant = relevant;
686 switch (def_type)
688 case vect_reduction_def:
689 switch (tmp_relevant)
691 case vect_unused_in_scope:
692 relevant = vect_used_by_reduction;
693 break;
695 case vect_used_by_reduction:
696 if (gimple_code (stmt) == GIMPLE_PHI)
697 break;
698 /* fall through */
700 default:
701 if (dump_enabled_p ())
702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
703 "unsupported use of reduction.\n");
704 return false;
707 live_p = false;
708 break;
710 case vect_nested_cycle:
711 if (tmp_relevant != vect_unused_in_scope
712 && tmp_relevant != vect_used_in_outer_by_reduction
713 && tmp_relevant != vect_used_in_outer)
715 if (dump_enabled_p ())
716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
717 "unsupported use of nested cycle.\n");
719 return false;
722 live_p = false;
723 break;
725 case vect_double_reduction_def:
726 if (tmp_relevant != vect_unused_in_scope
727 && tmp_relevant != vect_used_by_reduction)
729 if (dump_enabled_p ())
730 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
731 "unsupported use of double reduction.\n");
733 return false;
736 live_p = false;
737 break;
739 default:
740 break;
743 if (is_pattern_stmt_p (stmt_vinfo))
745 /* Pattern statements are not inserted into the code, so
746 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
747 have to scan the RHS or function arguments instead. */
748 if (is_gimple_assign (stmt))
750 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
751 tree op = gimple_assign_rhs1 (stmt);
753 i = 1;
754 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
756 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
757 live_p, relevant, &worklist, false)
758 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
759 live_p, relevant, &worklist, false))
760 return false;
761 i = 2;
763 for (; i < gimple_num_ops (stmt); i++)
765 op = gimple_op (stmt, i);
766 if (TREE_CODE (op) == SSA_NAME
767 && !process_use (stmt, op, loop_vinfo, live_p, relevant,
768 &worklist, false))
769 return false;
772 else if (is_gimple_call (stmt))
774 for (i = 0; i < gimple_call_num_args (stmt); i++)
776 tree arg = gimple_call_arg (stmt, i);
777 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
778 &worklist, false))
779 return false;
783 else
784 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
786 tree op = USE_FROM_PTR (use_p);
787 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
788 &worklist, false))
789 return false;
792 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
794 tree off;
795 tree decl = vect_check_gather_scatter (stmt, loop_vinfo, NULL, &off, NULL);
796 gcc_assert (decl);
797 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
798 &worklist, true))
799 return false;
801 } /* while worklist */
803 return true;
807 /* Function vect_model_simple_cost.
809 Models cost for simple operations, i.e. those that only emit ncopies of a
810 single op. Right now, this does not account for multiple insns that could
811 be generated for the single vector op. We will handle that shortly. */
813 void
814 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
815 enum vect_def_type *dt,
816 stmt_vector_for_cost *prologue_cost_vec,
817 stmt_vector_for_cost *body_cost_vec)
819 int i;
820 int inside_cost = 0, prologue_cost = 0;
822 /* The SLP costs were already calculated during SLP tree build. */
823 if (PURE_SLP_STMT (stmt_info))
824 return;
826 /* FORNOW: Assuming maximum 2 args per stmts. */
827 for (i = 0; i < 2; i++)
828 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
829 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
830 stmt_info, 0, vect_prologue);
832 /* Pass the inside-of-loop statements to the target-specific cost model. */
833 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
834 stmt_info, 0, vect_body);
836 if (dump_enabled_p ())
837 dump_printf_loc (MSG_NOTE, vect_location,
838 "vect_model_simple_cost: inside_cost = %d, "
839 "prologue_cost = %d .\n", inside_cost, prologue_cost);
843 /* Model cost for type demotion and promotion operations. PWR is normally
844 zero for single-step promotions and demotions. It will be one if
845 two-step promotion/demotion is required, and so on. Each additional
846 step doubles the number of instructions required. */
848 static void
849 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
850 enum vect_def_type *dt, int pwr)
852 int i, tmp;
853 int inside_cost = 0, prologue_cost = 0;
854 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
855 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
856 void *target_cost_data;
858 /* The SLP costs were already calculated during SLP tree build. */
859 if (PURE_SLP_STMT (stmt_info))
860 return;
862 if (loop_vinfo)
863 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
864 else
865 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
867 for (i = 0; i < pwr + 1; i++)
869 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
870 (i + 1) : i;
871 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
872 vec_promote_demote, stmt_info, 0,
873 vect_body);
876 /* FORNOW: Assuming maximum 2 args per stmts. */
877 for (i = 0; i < 2; i++)
878 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
879 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
880 stmt_info, 0, vect_prologue);
882 if (dump_enabled_p ())
883 dump_printf_loc (MSG_NOTE, vect_location,
884 "vect_model_promotion_demotion_cost: inside_cost = %d, "
885 "prologue_cost = %d .\n", inside_cost, prologue_cost);
888 /* Function vect_cost_group_size
890 For grouped load or store, return the group_size only if it is the first
891 load or store of a group, else return 1. This ensures that group size is
892 only returned once per group. */
894 static int
895 vect_cost_group_size (stmt_vec_info stmt_info)
897 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
899 if (first_stmt == STMT_VINFO_STMT (stmt_info))
900 return GROUP_SIZE (stmt_info);
902 return 1;
906 /* Function vect_model_store_cost
908 Models cost for stores. In the case of grouped accesses, one access
909 has the overhead of the grouped access attributed to it. */
911 void
912 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
913 bool store_lanes_p, enum vect_def_type dt,
914 slp_tree slp_node,
915 stmt_vector_for_cost *prologue_cost_vec,
916 stmt_vector_for_cost *body_cost_vec)
918 int group_size;
919 unsigned int inside_cost = 0, prologue_cost = 0;
920 struct data_reference *first_dr;
921 gimple *first_stmt;
923 if (dt == vect_constant_def || dt == vect_external_def)
924 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
925 stmt_info, 0, vect_prologue);
927 /* Grouped access? */
928 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
930 if (slp_node)
932 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
933 group_size = 1;
935 else
937 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
938 group_size = vect_cost_group_size (stmt_info);
941 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
943 /* Not a grouped access. */
944 else
946 group_size = 1;
947 first_dr = STMT_VINFO_DATA_REF (stmt_info);
950 /* We assume that the cost of a single store-lanes instruction is
951 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
952 access is instead being provided by a permute-and-store operation,
953 include the cost of the permutes. */
954 if (!store_lanes_p && group_size > 1
955 && !STMT_VINFO_STRIDED_P (stmt_info))
957 /* Uses a high and low interleave or shuffle operations for each
958 needed permute. */
959 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
960 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
961 stmt_info, 0, vect_body);
963 if (dump_enabled_p ())
964 dump_printf_loc (MSG_NOTE, vect_location,
965 "vect_model_store_cost: strided group_size = %d .\n",
966 group_size);
969 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
970 /* Costs of the stores. */
971 if (STMT_VINFO_STRIDED_P (stmt_info)
972 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
974 /* N scalar stores plus extracting the elements. */
975 inside_cost += record_stmt_cost (body_cost_vec,
976 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
977 scalar_store, stmt_info, 0, vect_body);
979 else
980 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
982 if (STMT_VINFO_STRIDED_P (stmt_info))
983 inside_cost += record_stmt_cost (body_cost_vec,
984 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
985 vec_to_scalar, stmt_info, 0, vect_body);
987 if (dump_enabled_p ())
988 dump_printf_loc (MSG_NOTE, vect_location,
989 "vect_model_store_cost: inside_cost = %d, "
990 "prologue_cost = %d .\n", inside_cost, prologue_cost);
994 /* Calculate cost of DR's memory access. */
995 void
996 vect_get_store_cost (struct data_reference *dr, int ncopies,
997 unsigned int *inside_cost,
998 stmt_vector_for_cost *body_cost_vec)
1000 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1001 gimple *stmt = DR_STMT (dr);
1002 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1004 switch (alignment_support_scheme)
1006 case dr_aligned:
1008 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1009 vector_store, stmt_info, 0,
1010 vect_body);
1012 if (dump_enabled_p ())
1013 dump_printf_loc (MSG_NOTE, vect_location,
1014 "vect_model_store_cost: aligned.\n");
1015 break;
1018 case dr_unaligned_supported:
1020 /* Here, we assign an additional cost for the unaligned store. */
1021 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1022 unaligned_store, stmt_info,
1023 DR_MISALIGNMENT (dr), vect_body);
1024 if (dump_enabled_p ())
1025 dump_printf_loc (MSG_NOTE, vect_location,
1026 "vect_model_store_cost: unaligned supported by "
1027 "hardware.\n");
1028 break;
1031 case dr_unaligned_unsupported:
1033 *inside_cost = VECT_MAX_COST;
1035 if (dump_enabled_p ())
1036 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1037 "vect_model_store_cost: unsupported access.\n");
1038 break;
1041 default:
1042 gcc_unreachable ();
1047 /* Function vect_model_load_cost
1049 Models cost for loads. In the case of grouped accesses, the last access
1050 has the overhead of the grouped access attributed to it. Since unaligned
1051 accesses are supported for loads, we also account for the costs of the
1052 access scheme chosen. */
1054 void
1055 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1056 bool load_lanes_p, slp_tree slp_node,
1057 stmt_vector_for_cost *prologue_cost_vec,
1058 stmt_vector_for_cost *body_cost_vec)
1060 int group_size;
1061 gimple *first_stmt;
1062 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1063 unsigned int inside_cost = 0, prologue_cost = 0;
1065 /* Grouped accesses? */
1066 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1067 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1069 group_size = vect_cost_group_size (stmt_info);
1070 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1072 /* Not a grouped access. */
1073 else
1075 group_size = 1;
1076 first_dr = dr;
1079 /* We assume that the cost of a single load-lanes instruction is
1080 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1081 access is instead being provided by a load-and-permute operation,
1082 include the cost of the permutes. */
1083 if (!load_lanes_p && group_size > 1
1084 && !STMT_VINFO_STRIDED_P (stmt_info))
1086 /* Uses an even and odd extract operations or shuffle operations
1087 for each needed permute. */
1088 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1089 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1090 stmt_info, 0, vect_body);
1092 if (dump_enabled_p ())
1093 dump_printf_loc (MSG_NOTE, vect_location,
1094 "vect_model_load_cost: strided group_size = %d .\n",
1095 group_size);
1098 /* The loads themselves. */
1099 if (STMT_VINFO_STRIDED_P (stmt_info)
1100 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1102 /* N scalar loads plus gathering them into a vector. */
1103 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1104 inside_cost += record_stmt_cost (body_cost_vec,
1105 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1106 scalar_load, stmt_info, 0, vect_body);
1108 else
1109 vect_get_load_cost (first_dr, ncopies,
1110 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1111 || group_size > 1 || slp_node),
1112 &inside_cost, &prologue_cost,
1113 prologue_cost_vec, body_cost_vec, true);
1114 if (STMT_VINFO_STRIDED_P (stmt_info))
1115 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1116 stmt_info, 0, vect_body);
1118 if (dump_enabled_p ())
1119 dump_printf_loc (MSG_NOTE, vect_location,
1120 "vect_model_load_cost: inside_cost = %d, "
1121 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1125 /* Calculate cost of DR's memory access. */
1126 void
1127 vect_get_load_cost (struct data_reference *dr, int ncopies,
1128 bool add_realign_cost, unsigned int *inside_cost,
1129 unsigned int *prologue_cost,
1130 stmt_vector_for_cost *prologue_cost_vec,
1131 stmt_vector_for_cost *body_cost_vec,
1132 bool record_prologue_costs)
1134 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1135 gimple *stmt = DR_STMT (dr);
1136 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1138 switch (alignment_support_scheme)
1140 case dr_aligned:
1142 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1143 stmt_info, 0, vect_body);
1145 if (dump_enabled_p ())
1146 dump_printf_loc (MSG_NOTE, vect_location,
1147 "vect_model_load_cost: aligned.\n");
1149 break;
1151 case dr_unaligned_supported:
1153 /* Here, we assign an additional cost for the unaligned load. */
1154 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1155 unaligned_load, stmt_info,
1156 DR_MISALIGNMENT (dr), vect_body);
1158 if (dump_enabled_p ())
1159 dump_printf_loc (MSG_NOTE, vect_location,
1160 "vect_model_load_cost: unaligned supported by "
1161 "hardware.\n");
1163 break;
1165 case dr_explicit_realign:
1167 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1168 vector_load, stmt_info, 0, vect_body);
1169 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1170 vec_perm, stmt_info, 0, vect_body);
1172 /* FIXME: If the misalignment remains fixed across the iterations of
1173 the containing loop, the following cost should be added to the
1174 prologue costs. */
1175 if (targetm.vectorize.builtin_mask_for_load)
1176 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1177 stmt_info, 0, vect_body);
1179 if (dump_enabled_p ())
1180 dump_printf_loc (MSG_NOTE, vect_location,
1181 "vect_model_load_cost: explicit realign\n");
1183 break;
1185 case dr_explicit_realign_optimized:
1187 if (dump_enabled_p ())
1188 dump_printf_loc (MSG_NOTE, vect_location,
1189 "vect_model_load_cost: unaligned software "
1190 "pipelined.\n");
1192 /* Unaligned software pipeline has a load of an address, an initial
1193 load, and possibly a mask operation to "prime" the loop. However,
1194 if this is an access in a group of loads, which provide grouped
1195 access, then the above cost should only be considered for one
1196 access in the group. Inside the loop, there is a load op
1197 and a realignment op. */
1199 if (add_realign_cost && record_prologue_costs)
1201 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1202 vector_stmt, stmt_info,
1203 0, vect_prologue);
1204 if (targetm.vectorize.builtin_mask_for_load)
1205 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1206 vector_stmt, stmt_info,
1207 0, vect_prologue);
1210 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1211 stmt_info, 0, vect_body);
1212 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1213 stmt_info, 0, vect_body);
1215 if (dump_enabled_p ())
1216 dump_printf_loc (MSG_NOTE, vect_location,
1217 "vect_model_load_cost: explicit realign optimized"
1218 "\n");
1220 break;
1223 case dr_unaligned_unsupported:
1225 *inside_cost = VECT_MAX_COST;
1227 if (dump_enabled_p ())
1228 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1229 "vect_model_load_cost: unsupported access.\n");
1230 break;
1233 default:
1234 gcc_unreachable ();
1238 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1239 the loop preheader for the vectorized stmt STMT. */
1241 static void
1242 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1244 if (gsi)
1245 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1246 else
1248 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1249 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1251 if (loop_vinfo)
1253 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1254 basic_block new_bb;
1255 edge pe;
1257 if (nested_in_vect_loop_p (loop, stmt))
1258 loop = loop->inner;
1260 pe = loop_preheader_edge (loop);
1261 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1262 gcc_assert (!new_bb);
1264 else
1266 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1267 basic_block bb;
1268 gimple_stmt_iterator gsi_bb_start;
1270 gcc_assert (bb_vinfo);
1271 bb = BB_VINFO_BB (bb_vinfo);
1272 gsi_bb_start = gsi_after_labels (bb);
1273 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1277 if (dump_enabled_p ())
1279 dump_printf_loc (MSG_NOTE, vect_location,
1280 "created new init_stmt: ");
1281 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1285 /* Function vect_init_vector.
1287 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1288 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1289 vector type a vector with all elements equal to VAL is created first.
1290 Place the initialization at BSI if it is not NULL. Otherwise, place the
1291 initialization at the loop preheader.
1292 Return the DEF of INIT_STMT.
1293 It will be used in the vectorization of STMT. */
1295 tree
1296 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1298 gimple *init_stmt;
1299 tree new_temp;
1301 if (TREE_CODE (type) == VECTOR_TYPE
1302 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1304 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1306 /* Scalar boolean value should be transformed into
1307 all zeros or all ones value before building a vector. */
1308 if (VECTOR_BOOLEAN_TYPE_P (type))
1310 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1311 tree false_val = build_zero_cst (TREE_TYPE (type));
1313 if (CONSTANT_CLASS_P (val))
1314 val = integer_zerop (val) ? false_val : true_val;
1315 else
1317 new_temp = make_ssa_name (TREE_TYPE (type));
1318 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1319 val, true_val, false_val);
1320 vect_init_vector_1 (stmt, init_stmt, gsi);
1321 val = new_temp;
1324 else if (CONSTANT_CLASS_P (val))
1325 val = fold_convert (TREE_TYPE (type), val);
1326 else
1328 new_temp = make_ssa_name (TREE_TYPE (type));
1329 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1330 vect_init_vector_1 (stmt, init_stmt, gsi);
1331 val = new_temp;
1334 val = build_vector_from_val (type, val);
1337 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1338 init_stmt = gimple_build_assign (new_temp, val);
1339 vect_init_vector_1 (stmt, init_stmt, gsi);
1340 return new_temp;
1344 /* Function vect_get_vec_def_for_operand.
1346 OP is an operand in STMT. This function returns a (vector) def that will be
1347 used in the vectorized stmt for STMT.
1349 In the case that OP is an SSA_NAME which is defined in the loop, then
1350 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1352 In case OP is an invariant or constant, a new stmt that creates a vector def
1353 needs to be introduced. VECTYPE may be used to specify a required type for
1354 vector invariant. */
1356 tree
1357 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1359 tree vec_oprnd;
1360 gimple *vec_stmt;
1361 gimple *def_stmt;
1362 stmt_vec_info def_stmt_info = NULL;
1363 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1364 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1365 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1366 enum vect_def_type dt;
1367 bool is_simple_use;
1368 tree vector_type;
1370 if (dump_enabled_p ())
1372 dump_printf_loc (MSG_NOTE, vect_location,
1373 "vect_get_vec_def_for_operand: ");
1374 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1375 dump_printf (MSG_NOTE, "\n");
1378 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1379 gcc_assert (is_simple_use);
1380 if (dump_enabled_p ())
1382 int loc_printed = 0;
1383 if (def_stmt)
1385 if (loc_printed)
1386 dump_printf (MSG_NOTE, " def_stmt = ");
1387 else
1388 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1389 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1393 switch (dt)
1395 /* operand is a constant or a loop invariant. */
1396 case vect_constant_def:
1397 case vect_external_def:
1399 if (vectype)
1400 vector_type = vectype;
1401 else if (TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE
1402 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1403 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1404 else
1405 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1407 gcc_assert (vector_type);
1408 return vect_init_vector (stmt, op, vector_type, NULL);
1411 /* operand is defined inside the loop. */
1412 case vect_internal_def:
1414 /* Get the def from the vectorized stmt. */
1415 def_stmt_info = vinfo_for_stmt (def_stmt);
1417 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1418 /* Get vectorized pattern statement. */
1419 if (!vec_stmt
1420 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1421 && !STMT_VINFO_RELEVANT (def_stmt_info))
1422 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1423 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1424 gcc_assert (vec_stmt);
1425 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1426 vec_oprnd = PHI_RESULT (vec_stmt);
1427 else if (is_gimple_call (vec_stmt))
1428 vec_oprnd = gimple_call_lhs (vec_stmt);
1429 else
1430 vec_oprnd = gimple_assign_lhs (vec_stmt);
1431 return vec_oprnd;
1434 /* operand is defined by a loop header phi - reduction */
1435 case vect_reduction_def:
1436 case vect_double_reduction_def:
1437 case vect_nested_cycle:
1438 /* Code should use get_initial_def_for_reduction. */
1439 gcc_unreachable ();
1441 /* operand is defined by loop-header phi - induction. */
1442 case vect_induction_def:
1444 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1446 /* Get the def from the vectorized stmt. */
1447 def_stmt_info = vinfo_for_stmt (def_stmt);
1448 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1449 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1450 vec_oprnd = PHI_RESULT (vec_stmt);
1451 else
1452 vec_oprnd = gimple_get_lhs (vec_stmt);
1453 return vec_oprnd;
1456 default:
1457 gcc_unreachable ();
1462 /* Function vect_get_vec_def_for_stmt_copy
1464 Return a vector-def for an operand. This function is used when the
1465 vectorized stmt to be created (by the caller to this function) is a "copy"
1466 created in case the vectorized result cannot fit in one vector, and several
1467 copies of the vector-stmt are required. In this case the vector-def is
1468 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1469 of the stmt that defines VEC_OPRND.
1470 DT is the type of the vector def VEC_OPRND.
1472 Context:
1473 In case the vectorization factor (VF) is bigger than the number
1474 of elements that can fit in a vectype (nunits), we have to generate
1475 more than one vector stmt to vectorize the scalar stmt. This situation
1476 arises when there are multiple data-types operated upon in the loop; the
1477 smallest data-type determines the VF, and as a result, when vectorizing
1478 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1479 vector stmt (each computing a vector of 'nunits' results, and together
1480 computing 'VF' results in each iteration). This function is called when
1481 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1482 which VF=16 and nunits=4, so the number of copies required is 4):
1484 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1486 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1487 VS1.1: vx.1 = memref1 VS1.2
1488 VS1.2: vx.2 = memref2 VS1.3
1489 VS1.3: vx.3 = memref3
1491 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1492 VSnew.1: vz1 = vx.1 + ... VSnew.2
1493 VSnew.2: vz2 = vx.2 + ... VSnew.3
1494 VSnew.3: vz3 = vx.3 + ...
1496 The vectorization of S1 is explained in vectorizable_load.
1497 The vectorization of S2:
1498 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1499 the function 'vect_get_vec_def_for_operand' is called to
1500 get the relevant vector-def for each operand of S2. For operand x it
1501 returns the vector-def 'vx.0'.
1503 To create the remaining copies of the vector-stmt (VSnew.j), this
1504 function is called to get the relevant vector-def for each operand. It is
1505 obtained from the respective VS1.j stmt, which is recorded in the
1506 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1508 For example, to obtain the vector-def 'vx.1' in order to create the
1509 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1510 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1511 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1512 and return its def ('vx.1').
1513 Overall, to create the above sequence this function will be called 3 times:
1514 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1515 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1516 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1518 tree
1519 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1521 gimple *vec_stmt_for_operand;
1522 stmt_vec_info def_stmt_info;
1524 /* Do nothing; can reuse same def. */
1525 if (dt == vect_external_def || dt == vect_constant_def )
1526 return vec_oprnd;
1528 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1529 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1530 gcc_assert (def_stmt_info);
1531 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1532 gcc_assert (vec_stmt_for_operand);
1533 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1534 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1535 else
1536 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1537 return vec_oprnd;
1541 /* Get vectorized definitions for the operands to create a copy of an original
1542 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1544 static void
1545 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1546 vec<tree> *vec_oprnds0,
1547 vec<tree> *vec_oprnds1)
1549 tree vec_oprnd = vec_oprnds0->pop ();
1551 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1552 vec_oprnds0->quick_push (vec_oprnd);
1554 if (vec_oprnds1 && vec_oprnds1->length ())
1556 vec_oprnd = vec_oprnds1->pop ();
1557 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1558 vec_oprnds1->quick_push (vec_oprnd);
1563 /* Get vectorized definitions for OP0 and OP1.
1564 REDUC_INDEX is the index of reduction operand in case of reduction,
1565 and -1 otherwise. */
1567 void
1568 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1569 vec<tree> *vec_oprnds0,
1570 vec<tree> *vec_oprnds1,
1571 slp_tree slp_node, int reduc_index)
1573 if (slp_node)
1575 int nops = (op1 == NULL_TREE) ? 1 : 2;
1576 auto_vec<tree> ops (nops);
1577 auto_vec<vec<tree> > vec_defs (nops);
1579 ops.quick_push (op0);
1580 if (op1)
1581 ops.quick_push (op1);
1583 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1585 *vec_oprnds0 = vec_defs[0];
1586 if (op1)
1587 *vec_oprnds1 = vec_defs[1];
1589 else
1591 tree vec_oprnd;
1593 vec_oprnds0->create (1);
1594 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1595 vec_oprnds0->quick_push (vec_oprnd);
1597 if (op1)
1599 vec_oprnds1->create (1);
1600 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1601 vec_oprnds1->quick_push (vec_oprnd);
1607 /* Function vect_finish_stmt_generation.
1609 Insert a new stmt. */
1611 void
1612 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1613 gimple_stmt_iterator *gsi)
1615 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1616 vec_info *vinfo = stmt_info->vinfo;
1618 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1620 if (!gsi_end_p (*gsi)
1621 && gimple_has_mem_ops (vec_stmt))
1623 gimple *at_stmt = gsi_stmt (*gsi);
1624 tree vuse = gimple_vuse (at_stmt);
1625 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1627 tree vdef = gimple_vdef (at_stmt);
1628 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1629 /* If we have an SSA vuse and insert a store, update virtual
1630 SSA form to avoid triggering the renamer. Do so only
1631 if we can easily see all uses - which is what almost always
1632 happens with the way vectorized stmts are inserted. */
1633 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1634 && ((is_gimple_assign (vec_stmt)
1635 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1636 || (is_gimple_call (vec_stmt)
1637 && !(gimple_call_flags (vec_stmt)
1638 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1640 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1641 gimple_set_vdef (vec_stmt, new_vdef);
1642 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1646 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1648 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1650 if (dump_enabled_p ())
1652 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1653 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1656 gimple_set_location (vec_stmt, gimple_location (stmt));
1658 /* While EH edges will generally prevent vectorization, stmt might
1659 e.g. be in a must-not-throw region. Ensure newly created stmts
1660 that could throw are part of the same region. */
1661 int lp_nr = lookup_stmt_eh_lp (stmt);
1662 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1663 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1666 /* We want to vectorize a call to combined function CFN with function
1667 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1668 as the types of all inputs. Check whether this is possible using
1669 an internal function, returning its code if so or IFN_LAST if not. */
1671 static internal_fn
1672 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1673 tree vectype_out, tree vectype_in)
1675 internal_fn ifn;
1676 if (internal_fn_p (cfn))
1677 ifn = as_internal_fn (cfn);
1678 else
1679 ifn = associated_internal_fn (fndecl);
1680 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1682 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1683 if (info.vectorizable)
1685 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1686 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1687 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1688 OPTIMIZE_FOR_SPEED))
1689 return ifn;
1692 return IFN_LAST;
1696 static tree permute_vec_elements (tree, tree, tree, gimple *,
1697 gimple_stmt_iterator *);
1700 /* Function vectorizable_mask_load_store.
1702 Check if STMT performs a conditional load or store that can be vectorized.
1703 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1704 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1705 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1707 static bool
1708 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
1709 gimple **vec_stmt, slp_tree slp_node)
1711 tree vec_dest = NULL;
1712 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1713 stmt_vec_info prev_stmt_info;
1714 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1715 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1716 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1717 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1718 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1719 tree rhs_vectype = NULL_TREE;
1720 tree mask_vectype;
1721 tree elem_type;
1722 gimple *new_stmt;
1723 tree dummy;
1724 tree dataref_ptr = NULL_TREE;
1725 gimple *ptr_incr;
1726 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1727 int ncopies;
1728 int i, j;
1729 bool inv_p;
1730 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1731 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1732 int gather_scale = 1;
1733 enum vect_def_type gather_dt = vect_unknown_def_type;
1734 bool is_store;
1735 tree mask;
1736 gimple *def_stmt;
1737 enum vect_def_type dt;
1739 if (slp_node != NULL)
1740 return false;
1742 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1743 gcc_assert (ncopies >= 1);
1745 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1746 mask = gimple_call_arg (stmt, 2);
1748 if (TREE_CODE (TREE_TYPE (mask)) != BOOLEAN_TYPE)
1749 return false;
1751 /* FORNOW. This restriction should be relaxed. */
1752 if (nested_in_vect_loop && ncopies > 1)
1754 if (dump_enabled_p ())
1755 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1756 "multiple types in nested loop.");
1757 return false;
1760 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1761 return false;
1763 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
1764 && ! vec_stmt)
1765 return false;
1767 if (!STMT_VINFO_DATA_REF (stmt_info))
1768 return false;
1770 elem_type = TREE_TYPE (vectype);
1772 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1773 return false;
1775 if (STMT_VINFO_STRIDED_P (stmt_info))
1776 return false;
1778 if (TREE_CODE (mask) != SSA_NAME)
1779 return false;
1781 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
1782 return false;
1784 if (!mask_vectype)
1785 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
1787 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
1788 return false;
1790 if (is_store)
1792 tree rhs = gimple_call_arg (stmt, 3);
1793 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
1794 return false;
1797 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1799 gimple *def_stmt;
1800 gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
1801 &gather_off, &gather_scale);
1802 gcc_assert (gather_decl);
1803 if (!vect_is_simple_use (gather_off, loop_vinfo, &def_stmt, &gather_dt,
1804 &gather_off_vectype))
1806 if (dump_enabled_p ())
1807 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1808 "gather index use not simple.");
1809 return false;
1812 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1813 tree masktype
1814 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1815 if (TREE_CODE (masktype) == INTEGER_TYPE)
1817 if (dump_enabled_p ())
1818 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1819 "masked gather with integer mask not supported.");
1820 return false;
1823 else if (tree_int_cst_compare (nested_in_vect_loop
1824 ? STMT_VINFO_DR_STEP (stmt_info)
1825 : DR_STEP (dr), size_zero_node) <= 0)
1826 return false;
1827 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1828 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
1829 TYPE_MODE (mask_vectype),
1830 !is_store)
1831 || (rhs_vectype
1832 && !useless_type_conversion_p (vectype, rhs_vectype)))
1833 return false;
1835 if (!vec_stmt) /* transformation not required. */
1837 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1838 if (is_store)
1839 vect_model_store_cost (stmt_info, ncopies, false, dt,
1840 NULL, NULL, NULL);
1841 else
1842 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1843 return true;
1846 /** Transform. **/
1848 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1850 tree vec_oprnd0 = NULL_TREE, op;
1851 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1852 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1853 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1854 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1855 tree mask_perm_mask = NULL_TREE;
1856 edge pe = loop_preheader_edge (loop);
1857 gimple_seq seq;
1858 basic_block new_bb;
1859 enum { NARROW, NONE, WIDEN } modifier;
1860 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1862 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1863 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1864 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1865 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1866 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1867 scaletype = TREE_VALUE (arglist);
1868 gcc_checking_assert (types_compatible_p (srctype, rettype)
1869 && types_compatible_p (srctype, masktype));
1871 if (nunits == gather_off_nunits)
1872 modifier = NONE;
1873 else if (nunits == gather_off_nunits / 2)
1875 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1876 modifier = WIDEN;
1878 for (i = 0; i < gather_off_nunits; ++i)
1879 sel[i] = i | nunits;
1881 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
1883 else if (nunits == gather_off_nunits * 2)
1885 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1886 modifier = NARROW;
1888 for (i = 0; i < nunits; ++i)
1889 sel[i] = i < gather_off_nunits
1890 ? i : i + nunits - gather_off_nunits;
1892 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
1893 ncopies *= 2;
1894 for (i = 0; i < nunits; ++i)
1895 sel[i] = i | gather_off_nunits;
1896 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
1898 else
1899 gcc_unreachable ();
1901 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1903 ptr = fold_convert (ptrtype, gather_base);
1904 if (!is_gimple_min_invariant (ptr))
1906 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1907 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1908 gcc_assert (!new_bb);
1911 scale = build_int_cst (scaletype, gather_scale);
1913 prev_stmt_info = NULL;
1914 for (j = 0; j < ncopies; ++j)
1916 if (modifier == WIDEN && (j & 1))
1917 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1918 perm_mask, stmt, gsi);
1919 else if (j == 0)
1920 op = vec_oprnd0
1921 = vect_get_vec_def_for_operand (gather_off, stmt);
1922 else
1923 op = vec_oprnd0
1924 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1926 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1928 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1929 == TYPE_VECTOR_SUBPARTS (idxtype));
1930 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
1931 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1932 new_stmt
1933 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1934 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1935 op = var;
1938 if (mask_perm_mask && (j & 1))
1939 mask_op = permute_vec_elements (mask_op, mask_op,
1940 mask_perm_mask, stmt, gsi);
1941 else
1943 if (j == 0)
1944 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
1945 else
1947 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
1948 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1951 mask_op = vec_mask;
1952 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1954 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
1955 == TYPE_VECTOR_SUBPARTS (masktype));
1956 var = vect_get_new_ssa_name (masktype, vect_simple_var);
1957 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
1958 new_stmt
1959 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
1960 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1961 mask_op = var;
1965 new_stmt
1966 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
1967 scale);
1969 if (!useless_type_conversion_p (vectype, rettype))
1971 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
1972 == TYPE_VECTOR_SUBPARTS (rettype));
1973 op = vect_get_new_ssa_name (rettype, vect_simple_var);
1974 gimple_call_set_lhs (new_stmt, op);
1975 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1976 var = make_ssa_name (vec_dest);
1977 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
1978 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1980 else
1982 var = make_ssa_name (vec_dest, new_stmt);
1983 gimple_call_set_lhs (new_stmt, var);
1986 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1988 if (modifier == NARROW)
1990 if ((j & 1) == 0)
1992 prev_res = var;
1993 continue;
1995 var = permute_vec_elements (prev_res, var,
1996 perm_mask, stmt, gsi);
1997 new_stmt = SSA_NAME_DEF_STMT (var);
2000 if (prev_stmt_info == NULL)
2001 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2002 else
2003 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2004 prev_stmt_info = vinfo_for_stmt (new_stmt);
2007 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2008 from the IL. */
2009 if (STMT_VINFO_RELATED_STMT (stmt_info))
2011 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2012 stmt_info = vinfo_for_stmt (stmt);
2014 tree lhs = gimple_call_lhs (stmt);
2015 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2016 set_vinfo_for_stmt (new_stmt, stmt_info);
2017 set_vinfo_for_stmt (stmt, NULL);
2018 STMT_VINFO_STMT (stmt_info) = new_stmt;
2019 gsi_replace (gsi, new_stmt, true);
2020 return true;
2022 else if (is_store)
2024 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2025 prev_stmt_info = NULL;
2026 for (i = 0; i < ncopies; i++)
2028 unsigned align, misalign;
2030 if (i == 0)
2032 tree rhs = gimple_call_arg (stmt, 3);
2033 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2034 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2035 /* We should have catched mismatched types earlier. */
2036 gcc_assert (useless_type_conversion_p (vectype,
2037 TREE_TYPE (vec_rhs)));
2038 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2039 NULL_TREE, &dummy, gsi,
2040 &ptr_incr, false, &inv_p);
2041 gcc_assert (!inv_p);
2043 else
2045 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
2046 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2047 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2048 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2049 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2050 TYPE_SIZE_UNIT (vectype));
2053 align = TYPE_ALIGN_UNIT (vectype);
2054 if (aligned_access_p (dr))
2055 misalign = 0;
2056 else if (DR_MISALIGNMENT (dr) == -1)
2058 align = TYPE_ALIGN_UNIT (elem_type);
2059 misalign = 0;
2061 else
2062 misalign = DR_MISALIGNMENT (dr);
2063 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2064 misalign);
2065 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2066 misalign ? misalign & -misalign : align);
2067 new_stmt
2068 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2069 ptr, vec_mask, vec_rhs);
2070 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2071 if (i == 0)
2072 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2073 else
2074 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2075 prev_stmt_info = vinfo_for_stmt (new_stmt);
2078 else
2080 tree vec_mask = NULL_TREE;
2081 prev_stmt_info = NULL;
2082 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2083 for (i = 0; i < ncopies; i++)
2085 unsigned align, misalign;
2087 if (i == 0)
2089 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2090 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2091 NULL_TREE, &dummy, gsi,
2092 &ptr_incr, false, &inv_p);
2093 gcc_assert (!inv_p);
2095 else
2097 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2098 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2099 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2100 TYPE_SIZE_UNIT (vectype));
2103 align = TYPE_ALIGN_UNIT (vectype);
2104 if (aligned_access_p (dr))
2105 misalign = 0;
2106 else if (DR_MISALIGNMENT (dr) == -1)
2108 align = TYPE_ALIGN_UNIT (elem_type);
2109 misalign = 0;
2111 else
2112 misalign = DR_MISALIGNMENT (dr);
2113 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2114 misalign);
2115 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2116 misalign ? misalign & -misalign : align);
2117 new_stmt
2118 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2119 ptr, vec_mask);
2120 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2121 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2122 if (i == 0)
2123 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2124 else
2125 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2126 prev_stmt_info = vinfo_for_stmt (new_stmt);
2130 if (!is_store)
2132 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2133 from the IL. */
2134 if (STMT_VINFO_RELATED_STMT (stmt_info))
2136 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2137 stmt_info = vinfo_for_stmt (stmt);
2139 tree lhs = gimple_call_lhs (stmt);
2140 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2141 set_vinfo_for_stmt (new_stmt, stmt_info);
2142 set_vinfo_for_stmt (stmt, NULL);
2143 STMT_VINFO_STMT (stmt_info) = new_stmt;
2144 gsi_replace (gsi, new_stmt, true);
2147 return true;
2150 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2151 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2152 in a single step. On success, store the binary pack code in
2153 *CONVERT_CODE. */
2155 static bool
2156 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2157 tree_code *convert_code)
2159 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2160 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2161 return false;
2163 tree_code code;
2164 int multi_step_cvt = 0;
2165 auto_vec <tree, 8> interm_types;
2166 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2167 &code, &multi_step_cvt,
2168 &interm_types)
2169 || multi_step_cvt)
2170 return false;
2172 *convert_code = code;
2173 return true;
2176 /* Function vectorizable_call.
2178 Check if GS performs a function call that can be vectorized.
2179 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2180 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2181 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2183 static bool
2184 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2185 slp_tree slp_node)
2187 gcall *stmt;
2188 tree vec_dest;
2189 tree scalar_dest;
2190 tree op, type;
2191 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2192 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2193 tree vectype_out, vectype_in;
2194 int nunits_in;
2195 int nunits_out;
2196 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2197 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2198 vec_info *vinfo = stmt_info->vinfo;
2199 tree fndecl, new_temp, rhs_type;
2200 gimple *def_stmt;
2201 enum vect_def_type dt[3]
2202 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2203 gimple *new_stmt = NULL;
2204 int ncopies, j;
2205 vec<tree> vargs = vNULL;
2206 enum { NARROW, NONE, WIDEN } modifier;
2207 size_t i, nargs;
2208 tree lhs;
2210 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2211 return false;
2213 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2214 && ! vec_stmt)
2215 return false;
2217 /* Is GS a vectorizable call? */
2218 stmt = dyn_cast <gcall *> (gs);
2219 if (!stmt)
2220 return false;
2222 if (gimple_call_internal_p (stmt)
2223 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2224 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2225 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2226 slp_node);
2228 if (gimple_call_lhs (stmt) == NULL_TREE
2229 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2230 return false;
2232 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2234 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2236 /* Process function arguments. */
2237 rhs_type = NULL_TREE;
2238 vectype_in = NULL_TREE;
2239 nargs = gimple_call_num_args (stmt);
2241 /* Bail out if the function has more than three arguments, we do not have
2242 interesting builtin functions to vectorize with more than two arguments
2243 except for fma. No arguments is also not good. */
2244 if (nargs == 0 || nargs > 3)
2245 return false;
2247 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2248 if (gimple_call_internal_p (stmt)
2249 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2251 nargs = 0;
2252 rhs_type = unsigned_type_node;
2255 for (i = 0; i < nargs; i++)
2257 tree opvectype;
2259 op = gimple_call_arg (stmt, i);
2261 /* We can only handle calls with arguments of the same type. */
2262 if (rhs_type
2263 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2265 if (dump_enabled_p ())
2266 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2267 "argument types differ.\n");
2268 return false;
2270 if (!rhs_type)
2271 rhs_type = TREE_TYPE (op);
2273 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2275 if (dump_enabled_p ())
2276 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2277 "use not simple.\n");
2278 return false;
2281 if (!vectype_in)
2282 vectype_in = opvectype;
2283 else if (opvectype
2284 && opvectype != vectype_in)
2286 if (dump_enabled_p ())
2287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2288 "argument vector types differ.\n");
2289 return false;
2292 /* If all arguments are external or constant defs use a vector type with
2293 the same size as the output vector type. */
2294 if (!vectype_in)
2295 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2296 if (vec_stmt)
2297 gcc_assert (vectype_in);
2298 if (!vectype_in)
2300 if (dump_enabled_p ())
2302 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2303 "no vectype for scalar type ");
2304 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2305 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2308 return false;
2311 /* FORNOW */
2312 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2313 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2314 if (nunits_in == nunits_out / 2)
2315 modifier = NARROW;
2316 else if (nunits_out == nunits_in)
2317 modifier = NONE;
2318 else if (nunits_out == nunits_in / 2)
2319 modifier = WIDEN;
2320 else
2321 return false;
2323 /* We only handle functions that do not read or clobber memory. */
2324 if (gimple_vuse (stmt))
2326 if (dump_enabled_p ())
2327 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2328 "function reads from or writes to memory.\n");
2329 return false;
2332 /* For now, we only vectorize functions if a target specific builtin
2333 is available. TODO -- in some cases, it might be profitable to
2334 insert the calls for pieces of the vector, in order to be able
2335 to vectorize other operations in the loop. */
2336 fndecl = NULL_TREE;
2337 internal_fn ifn = IFN_LAST;
2338 combined_fn cfn = gimple_call_combined_fn (stmt);
2339 tree callee = gimple_call_fndecl (stmt);
2341 /* First try using an internal function. */
2342 tree_code convert_code = ERROR_MARK;
2343 if (cfn != CFN_LAST
2344 && (modifier == NONE
2345 || (modifier == NARROW
2346 && simple_integer_narrowing (vectype_out, vectype_in,
2347 &convert_code))))
2348 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2349 vectype_in);
2351 /* If that fails, try asking for a target-specific built-in function. */
2352 if (ifn == IFN_LAST)
2354 if (cfn != CFN_LAST)
2355 fndecl = targetm.vectorize.builtin_vectorized_function
2356 (cfn, vectype_out, vectype_in);
2357 else
2358 fndecl = targetm.vectorize.builtin_md_vectorized_function
2359 (callee, vectype_out, vectype_in);
2362 if (ifn == IFN_LAST && !fndecl)
2364 if (cfn == CFN_GOMP_SIMD_LANE
2365 && !slp_node
2366 && loop_vinfo
2367 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2368 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2369 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2370 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2372 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2373 { 0, 1, 2, ... vf - 1 } vector. */
2374 gcc_assert (nargs == 0);
2376 else
2378 if (dump_enabled_p ())
2379 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2380 "function is not vectorizable.\n");
2381 return false;
2385 if (slp_node || PURE_SLP_STMT (stmt_info))
2386 ncopies = 1;
2387 else if (modifier == NARROW && ifn == IFN_LAST)
2388 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2389 else
2390 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2392 /* Sanity check: make sure that at least one copy of the vectorized stmt
2393 needs to be generated. */
2394 gcc_assert (ncopies >= 1);
2396 if (!vec_stmt) /* transformation not required. */
2398 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2399 if (dump_enabled_p ())
2400 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2401 "\n");
2402 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2403 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2404 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2405 vec_promote_demote, stmt_info, 0, vect_body);
2407 return true;
2410 /** Transform. **/
2412 if (dump_enabled_p ())
2413 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2415 /* Handle def. */
2416 scalar_dest = gimple_call_lhs (stmt);
2417 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2419 prev_stmt_info = NULL;
2420 if (modifier == NONE || ifn != IFN_LAST)
2422 tree prev_res = NULL_TREE;
2423 for (j = 0; j < ncopies; ++j)
2425 /* Build argument list for the vectorized call. */
2426 if (j == 0)
2427 vargs.create (nargs);
2428 else
2429 vargs.truncate (0);
2431 if (slp_node)
2433 auto_vec<vec<tree> > vec_defs (nargs);
2434 vec<tree> vec_oprnds0;
2436 for (i = 0; i < nargs; i++)
2437 vargs.quick_push (gimple_call_arg (stmt, i));
2438 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2439 vec_oprnds0 = vec_defs[0];
2441 /* Arguments are ready. Create the new vector stmt. */
2442 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2444 size_t k;
2445 for (k = 0; k < nargs; k++)
2447 vec<tree> vec_oprndsk = vec_defs[k];
2448 vargs[k] = vec_oprndsk[i];
2450 if (modifier == NARROW)
2452 tree half_res = make_ssa_name (vectype_in);
2453 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2454 gimple_call_set_lhs (new_stmt, half_res);
2455 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2456 if ((i & 1) == 0)
2458 prev_res = half_res;
2459 continue;
2461 new_temp = make_ssa_name (vec_dest);
2462 new_stmt = gimple_build_assign (new_temp, convert_code,
2463 prev_res, half_res);
2465 else
2467 if (ifn != IFN_LAST)
2468 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2469 else
2470 new_stmt = gimple_build_call_vec (fndecl, vargs);
2471 new_temp = make_ssa_name (vec_dest, new_stmt);
2472 gimple_call_set_lhs (new_stmt, new_temp);
2474 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2475 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2478 for (i = 0; i < nargs; i++)
2480 vec<tree> vec_oprndsi = vec_defs[i];
2481 vec_oprndsi.release ();
2483 continue;
2486 for (i = 0; i < nargs; i++)
2488 op = gimple_call_arg (stmt, i);
2489 if (j == 0)
2490 vec_oprnd0
2491 = vect_get_vec_def_for_operand (op, stmt);
2492 else
2494 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2495 vec_oprnd0
2496 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2499 vargs.quick_push (vec_oprnd0);
2502 if (gimple_call_internal_p (stmt)
2503 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2505 tree *v = XALLOCAVEC (tree, nunits_out);
2506 int k;
2507 for (k = 0; k < nunits_out; ++k)
2508 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2509 tree cst = build_vector (vectype_out, v);
2510 tree new_var
2511 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2512 gimple *init_stmt = gimple_build_assign (new_var, cst);
2513 vect_init_vector_1 (stmt, init_stmt, NULL);
2514 new_temp = make_ssa_name (vec_dest);
2515 new_stmt = gimple_build_assign (new_temp, new_var);
2517 else if (modifier == NARROW)
2519 tree half_res = make_ssa_name (vectype_in);
2520 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2521 gimple_call_set_lhs (new_stmt, half_res);
2522 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2523 if ((j & 1) == 0)
2525 prev_res = half_res;
2526 continue;
2528 new_temp = make_ssa_name (vec_dest);
2529 new_stmt = gimple_build_assign (new_temp, convert_code,
2530 prev_res, half_res);
2532 else
2534 if (ifn != IFN_LAST)
2535 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2536 else
2537 new_stmt = gimple_build_call_vec (fndecl, vargs);
2538 new_temp = make_ssa_name (vec_dest, new_stmt);
2539 gimple_call_set_lhs (new_stmt, new_temp);
2541 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2543 if (j == (modifier == NARROW ? 1 : 0))
2544 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2545 else
2546 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2548 prev_stmt_info = vinfo_for_stmt (new_stmt);
2551 else if (modifier == NARROW)
2553 for (j = 0; j < ncopies; ++j)
2555 /* Build argument list for the vectorized call. */
2556 if (j == 0)
2557 vargs.create (nargs * 2);
2558 else
2559 vargs.truncate (0);
2561 if (slp_node)
2563 auto_vec<vec<tree> > vec_defs (nargs);
2564 vec<tree> vec_oprnds0;
2566 for (i = 0; i < nargs; i++)
2567 vargs.quick_push (gimple_call_arg (stmt, i));
2568 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2569 vec_oprnds0 = vec_defs[0];
2571 /* Arguments are ready. Create the new vector stmt. */
2572 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2574 size_t k;
2575 vargs.truncate (0);
2576 for (k = 0; k < nargs; k++)
2578 vec<tree> vec_oprndsk = vec_defs[k];
2579 vargs.quick_push (vec_oprndsk[i]);
2580 vargs.quick_push (vec_oprndsk[i + 1]);
2582 if (ifn != IFN_LAST)
2583 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2584 else
2585 new_stmt = gimple_build_call_vec (fndecl, vargs);
2586 new_temp = make_ssa_name (vec_dest, new_stmt);
2587 gimple_call_set_lhs (new_stmt, new_temp);
2588 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2589 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2592 for (i = 0; i < nargs; i++)
2594 vec<tree> vec_oprndsi = vec_defs[i];
2595 vec_oprndsi.release ();
2597 continue;
2600 for (i = 0; i < nargs; i++)
2602 op = gimple_call_arg (stmt, i);
2603 if (j == 0)
2605 vec_oprnd0
2606 = vect_get_vec_def_for_operand (op, stmt);
2607 vec_oprnd1
2608 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2610 else
2612 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2613 vec_oprnd0
2614 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2615 vec_oprnd1
2616 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2619 vargs.quick_push (vec_oprnd0);
2620 vargs.quick_push (vec_oprnd1);
2623 new_stmt = gimple_build_call_vec (fndecl, vargs);
2624 new_temp = make_ssa_name (vec_dest, new_stmt);
2625 gimple_call_set_lhs (new_stmt, new_temp);
2626 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2628 if (j == 0)
2629 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2630 else
2631 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2633 prev_stmt_info = vinfo_for_stmt (new_stmt);
2636 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2638 else
2639 /* No current target implements this case. */
2640 return false;
2642 vargs.release ();
2644 /* The call in STMT might prevent it from being removed in dce.
2645 We however cannot remove it here, due to the way the ssa name
2646 it defines is mapped to the new definition. So just replace
2647 rhs of the statement with something harmless. */
2649 if (slp_node)
2650 return true;
2652 type = TREE_TYPE (scalar_dest);
2653 if (is_pattern_stmt_p (stmt_info))
2654 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2655 else
2656 lhs = gimple_call_lhs (stmt);
2658 if (gimple_call_internal_p (stmt)
2659 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2661 /* Replace uses of the lhs of GOMP_SIMD_LANE call outside the loop
2662 with vf - 1 rather than 0, that is the last iteration of the
2663 vectorized loop. */
2664 imm_use_iterator iter;
2665 use_operand_p use_p;
2666 gimple *use_stmt;
2667 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2669 basic_block use_bb = gimple_bb (use_stmt);
2670 if (use_bb
2671 && !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo), use_bb))
2673 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2674 SET_USE (use_p, build_int_cst (TREE_TYPE (lhs),
2675 ncopies * nunits_out - 1));
2676 update_stmt (use_stmt);
2681 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2682 set_vinfo_for_stmt (new_stmt, stmt_info);
2683 set_vinfo_for_stmt (stmt, NULL);
2684 STMT_VINFO_STMT (stmt_info) = new_stmt;
2685 gsi_replace (gsi, new_stmt, false);
2687 return true;
2691 struct simd_call_arg_info
2693 tree vectype;
2694 tree op;
2695 enum vect_def_type dt;
2696 HOST_WIDE_INT linear_step;
2697 unsigned int align;
2698 bool simd_lane_linear;
2701 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
2702 is linear within simd lane (but not within whole loop), note it in
2703 *ARGINFO. */
2705 static void
2706 vect_simd_lane_linear (tree op, struct loop *loop,
2707 struct simd_call_arg_info *arginfo)
2709 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
2711 if (!is_gimple_assign (def_stmt)
2712 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
2713 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
2714 return;
2716 tree base = gimple_assign_rhs1 (def_stmt);
2717 HOST_WIDE_INT linear_step = 0;
2718 tree v = gimple_assign_rhs2 (def_stmt);
2719 while (TREE_CODE (v) == SSA_NAME)
2721 tree t;
2722 def_stmt = SSA_NAME_DEF_STMT (v);
2723 if (is_gimple_assign (def_stmt))
2724 switch (gimple_assign_rhs_code (def_stmt))
2726 case PLUS_EXPR:
2727 t = gimple_assign_rhs2 (def_stmt);
2728 if (linear_step || TREE_CODE (t) != INTEGER_CST)
2729 return;
2730 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
2731 v = gimple_assign_rhs1 (def_stmt);
2732 continue;
2733 case MULT_EXPR:
2734 t = gimple_assign_rhs2 (def_stmt);
2735 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
2736 return;
2737 linear_step = tree_to_shwi (t);
2738 v = gimple_assign_rhs1 (def_stmt);
2739 continue;
2740 CASE_CONVERT:
2741 t = gimple_assign_rhs1 (def_stmt);
2742 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
2743 || (TYPE_PRECISION (TREE_TYPE (v))
2744 < TYPE_PRECISION (TREE_TYPE (t))))
2745 return;
2746 if (!linear_step)
2747 linear_step = 1;
2748 v = t;
2749 continue;
2750 default:
2751 return;
2753 else if (is_gimple_call (def_stmt)
2754 && gimple_call_internal_p (def_stmt)
2755 && gimple_call_internal_fn (def_stmt) == IFN_GOMP_SIMD_LANE
2756 && loop->simduid
2757 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
2758 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
2759 == loop->simduid))
2761 if (!linear_step)
2762 linear_step = 1;
2763 arginfo->linear_step = linear_step;
2764 arginfo->op = base;
2765 arginfo->simd_lane_linear = true;
2766 return;
2771 /* Function vectorizable_simd_clone_call.
2773 Check if STMT performs a function call that can be vectorized
2774 by calling a simd clone of the function.
2775 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2776 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2777 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2779 static bool
2780 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
2781 gimple **vec_stmt, slp_tree slp_node)
2783 tree vec_dest;
2784 tree scalar_dest;
2785 tree op, type;
2786 tree vec_oprnd0 = NULL_TREE;
2787 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2788 tree vectype;
2789 unsigned int nunits;
2790 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2791 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2792 vec_info *vinfo = stmt_info->vinfo;
2793 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2794 tree fndecl, new_temp;
2795 gimple *def_stmt;
2796 gimple *new_stmt = NULL;
2797 int ncopies, j;
2798 vec<simd_call_arg_info> arginfo = vNULL;
2799 vec<tree> vargs = vNULL;
2800 size_t i, nargs;
2801 tree lhs, rtype, ratype;
2802 vec<constructor_elt, va_gc> *ret_ctor_elts;
2804 /* Is STMT a vectorizable call? */
2805 if (!is_gimple_call (stmt))
2806 return false;
2808 fndecl = gimple_call_fndecl (stmt);
2809 if (fndecl == NULL_TREE)
2810 return false;
2812 struct cgraph_node *node = cgraph_node::get (fndecl);
2813 if (node == NULL || node->simd_clones == NULL)
2814 return false;
2816 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2817 return false;
2819 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2820 && ! vec_stmt)
2821 return false;
2823 if (gimple_call_lhs (stmt)
2824 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2825 return false;
2827 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2829 vectype = STMT_VINFO_VECTYPE (stmt_info);
2831 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2832 return false;
2834 /* FORNOW */
2835 if (slp_node || PURE_SLP_STMT (stmt_info))
2836 return false;
2838 /* Process function arguments. */
2839 nargs = gimple_call_num_args (stmt);
2841 /* Bail out if the function has zero arguments. */
2842 if (nargs == 0)
2843 return false;
2845 arginfo.create (nargs);
2847 for (i = 0; i < nargs; i++)
2849 simd_call_arg_info thisarginfo;
2850 affine_iv iv;
2852 thisarginfo.linear_step = 0;
2853 thisarginfo.align = 0;
2854 thisarginfo.op = NULL_TREE;
2855 thisarginfo.simd_lane_linear = false;
2857 op = gimple_call_arg (stmt, i);
2858 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
2859 &thisarginfo.vectype)
2860 || thisarginfo.dt == vect_uninitialized_def)
2862 if (dump_enabled_p ())
2863 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2864 "use not simple.\n");
2865 arginfo.release ();
2866 return false;
2869 if (thisarginfo.dt == vect_constant_def
2870 || thisarginfo.dt == vect_external_def)
2871 gcc_assert (thisarginfo.vectype == NULL_TREE);
2872 else
2873 gcc_assert (thisarginfo.vectype != NULL_TREE);
2875 /* For linear arguments, the analyze phase should have saved
2876 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2877 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2878 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
2880 gcc_assert (vec_stmt);
2881 thisarginfo.linear_step
2882 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
2883 thisarginfo.op
2884 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
2885 thisarginfo.simd_lane_linear
2886 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
2887 == boolean_true_node);
2888 /* If loop has been peeled for alignment, we need to adjust it. */
2889 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2890 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2891 if (n1 != n2 && !thisarginfo.simd_lane_linear)
2893 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2894 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
2895 tree opt = TREE_TYPE (thisarginfo.op);
2896 bias = fold_convert (TREE_TYPE (step), bias);
2897 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2898 thisarginfo.op
2899 = fold_build2 (POINTER_TYPE_P (opt)
2900 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2901 thisarginfo.op, bias);
2904 else if (!vec_stmt
2905 && thisarginfo.dt != vect_constant_def
2906 && thisarginfo.dt != vect_external_def
2907 && loop_vinfo
2908 && TREE_CODE (op) == SSA_NAME
2909 && simple_iv (loop, loop_containing_stmt (stmt), op,
2910 &iv, false)
2911 && tree_fits_shwi_p (iv.step))
2913 thisarginfo.linear_step = tree_to_shwi (iv.step);
2914 thisarginfo.op = iv.base;
2916 else if ((thisarginfo.dt == vect_constant_def
2917 || thisarginfo.dt == vect_external_def)
2918 && POINTER_TYPE_P (TREE_TYPE (op)))
2919 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2920 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
2921 linear too. */
2922 if (POINTER_TYPE_P (TREE_TYPE (op))
2923 && !thisarginfo.linear_step
2924 && !vec_stmt
2925 && thisarginfo.dt != vect_constant_def
2926 && thisarginfo.dt != vect_external_def
2927 && loop_vinfo
2928 && !slp_node
2929 && TREE_CODE (op) == SSA_NAME)
2930 vect_simd_lane_linear (op, loop, &thisarginfo);
2932 arginfo.quick_push (thisarginfo);
2935 unsigned int badness = 0;
2936 struct cgraph_node *bestn = NULL;
2937 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2938 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
2939 else
2940 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2941 n = n->simdclone->next_clone)
2943 unsigned int this_badness = 0;
2944 if (n->simdclone->simdlen
2945 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2946 || n->simdclone->nargs != nargs)
2947 continue;
2948 if (n->simdclone->simdlen
2949 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2950 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2951 - exact_log2 (n->simdclone->simdlen)) * 1024;
2952 if (n->simdclone->inbranch)
2953 this_badness += 2048;
2954 int target_badness = targetm.simd_clone.usable (n);
2955 if (target_badness < 0)
2956 continue;
2957 this_badness += target_badness * 512;
2958 /* FORNOW: Have to add code to add the mask argument. */
2959 if (n->simdclone->inbranch)
2960 continue;
2961 for (i = 0; i < nargs; i++)
2963 switch (n->simdclone->args[i].arg_type)
2965 case SIMD_CLONE_ARG_TYPE_VECTOR:
2966 if (!useless_type_conversion_p
2967 (n->simdclone->args[i].orig_type,
2968 TREE_TYPE (gimple_call_arg (stmt, i))))
2969 i = -1;
2970 else if (arginfo[i].dt == vect_constant_def
2971 || arginfo[i].dt == vect_external_def
2972 || arginfo[i].linear_step)
2973 this_badness += 64;
2974 break;
2975 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2976 if (arginfo[i].dt != vect_constant_def
2977 && arginfo[i].dt != vect_external_def)
2978 i = -1;
2979 break;
2980 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2981 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
2982 if (arginfo[i].dt == vect_constant_def
2983 || arginfo[i].dt == vect_external_def
2984 || (arginfo[i].linear_step
2985 != n->simdclone->args[i].linear_step))
2986 i = -1;
2987 break;
2988 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2989 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
2990 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
2991 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
2992 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
2993 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
2994 /* FORNOW */
2995 i = -1;
2996 break;
2997 case SIMD_CLONE_ARG_TYPE_MASK:
2998 gcc_unreachable ();
3000 if (i == (size_t) -1)
3001 break;
3002 if (n->simdclone->args[i].alignment > arginfo[i].align)
3004 i = -1;
3005 break;
3007 if (arginfo[i].align)
3008 this_badness += (exact_log2 (arginfo[i].align)
3009 - exact_log2 (n->simdclone->args[i].alignment));
3011 if (i == (size_t) -1)
3012 continue;
3013 if (bestn == NULL || this_badness < badness)
3015 bestn = n;
3016 badness = this_badness;
3020 if (bestn == NULL)
3022 arginfo.release ();
3023 return false;
3026 for (i = 0; i < nargs; i++)
3027 if ((arginfo[i].dt == vect_constant_def
3028 || arginfo[i].dt == vect_external_def)
3029 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3031 arginfo[i].vectype
3032 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3033 i)));
3034 if (arginfo[i].vectype == NULL
3035 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3036 > bestn->simdclone->simdlen))
3038 arginfo.release ();
3039 return false;
3043 fndecl = bestn->decl;
3044 nunits = bestn->simdclone->simdlen;
3045 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3047 /* If the function isn't const, only allow it in simd loops where user
3048 has asserted that at least nunits consecutive iterations can be
3049 performed using SIMD instructions. */
3050 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3051 && gimple_vuse (stmt))
3053 arginfo.release ();
3054 return false;
3057 /* Sanity check: make sure that at least one copy of the vectorized stmt
3058 needs to be generated. */
3059 gcc_assert (ncopies >= 1);
3061 if (!vec_stmt) /* transformation not required. */
3063 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3064 for (i = 0; i < nargs; i++)
3065 if (bestn->simdclone->args[i].arg_type
3066 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3068 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3069 + 1);
3070 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3071 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3072 ? size_type_node : TREE_TYPE (arginfo[i].op);
3073 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3074 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3075 tree sll = arginfo[i].simd_lane_linear
3076 ? boolean_true_node : boolean_false_node;
3077 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3079 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3080 if (dump_enabled_p ())
3081 dump_printf_loc (MSG_NOTE, vect_location,
3082 "=== vectorizable_simd_clone_call ===\n");
3083 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3084 arginfo.release ();
3085 return true;
3088 /** Transform. **/
3090 if (dump_enabled_p ())
3091 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3093 /* Handle def. */
3094 scalar_dest = gimple_call_lhs (stmt);
3095 vec_dest = NULL_TREE;
3096 rtype = NULL_TREE;
3097 ratype = NULL_TREE;
3098 if (scalar_dest)
3100 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3101 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3102 if (TREE_CODE (rtype) == ARRAY_TYPE)
3104 ratype = rtype;
3105 rtype = TREE_TYPE (ratype);
3109 prev_stmt_info = NULL;
3110 for (j = 0; j < ncopies; ++j)
3112 /* Build argument list for the vectorized call. */
3113 if (j == 0)
3114 vargs.create (nargs);
3115 else
3116 vargs.truncate (0);
3118 for (i = 0; i < nargs; i++)
3120 unsigned int k, l, m, o;
3121 tree atype;
3122 op = gimple_call_arg (stmt, i);
3123 switch (bestn->simdclone->args[i].arg_type)
3125 case SIMD_CLONE_ARG_TYPE_VECTOR:
3126 atype = bestn->simdclone->args[i].vector_type;
3127 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3128 for (m = j * o; m < (j + 1) * o; m++)
3130 if (TYPE_VECTOR_SUBPARTS (atype)
3131 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3133 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3134 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3135 / TYPE_VECTOR_SUBPARTS (atype));
3136 gcc_assert ((k & (k - 1)) == 0);
3137 if (m == 0)
3138 vec_oprnd0
3139 = vect_get_vec_def_for_operand (op, stmt);
3140 else
3142 vec_oprnd0 = arginfo[i].op;
3143 if ((m & (k - 1)) == 0)
3144 vec_oprnd0
3145 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3146 vec_oprnd0);
3148 arginfo[i].op = vec_oprnd0;
3149 vec_oprnd0
3150 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3151 size_int (prec),
3152 bitsize_int ((m & (k - 1)) * prec));
3153 new_stmt
3154 = gimple_build_assign (make_ssa_name (atype),
3155 vec_oprnd0);
3156 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3157 vargs.safe_push (gimple_assign_lhs (new_stmt));
3159 else
3161 k = (TYPE_VECTOR_SUBPARTS (atype)
3162 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3163 gcc_assert ((k & (k - 1)) == 0);
3164 vec<constructor_elt, va_gc> *ctor_elts;
3165 if (k != 1)
3166 vec_alloc (ctor_elts, k);
3167 else
3168 ctor_elts = NULL;
3169 for (l = 0; l < k; l++)
3171 if (m == 0 && l == 0)
3172 vec_oprnd0
3173 = vect_get_vec_def_for_operand (op, stmt);
3174 else
3175 vec_oprnd0
3176 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3177 arginfo[i].op);
3178 arginfo[i].op = vec_oprnd0;
3179 if (k == 1)
3180 break;
3181 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3182 vec_oprnd0);
3184 if (k == 1)
3185 vargs.safe_push (vec_oprnd0);
3186 else
3188 vec_oprnd0 = build_constructor (atype, ctor_elts);
3189 new_stmt
3190 = gimple_build_assign (make_ssa_name (atype),
3191 vec_oprnd0);
3192 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3193 vargs.safe_push (gimple_assign_lhs (new_stmt));
3197 break;
3198 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3199 vargs.safe_push (op);
3200 break;
3201 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3202 if (j == 0)
3204 gimple_seq stmts;
3205 arginfo[i].op
3206 = force_gimple_operand (arginfo[i].op, &stmts, true,
3207 NULL_TREE);
3208 if (stmts != NULL)
3210 basic_block new_bb;
3211 edge pe = loop_preheader_edge (loop);
3212 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3213 gcc_assert (!new_bb);
3215 if (arginfo[i].simd_lane_linear)
3217 vargs.safe_push (arginfo[i].op);
3218 break;
3220 tree phi_res = copy_ssa_name (op);
3221 gphi *new_phi = create_phi_node (phi_res, loop->header);
3222 set_vinfo_for_stmt (new_phi,
3223 new_stmt_vec_info (new_phi, loop_vinfo));
3224 add_phi_arg (new_phi, arginfo[i].op,
3225 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3226 enum tree_code code
3227 = POINTER_TYPE_P (TREE_TYPE (op))
3228 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3229 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3230 ? sizetype : TREE_TYPE (op);
3231 widest_int cst
3232 = wi::mul (bestn->simdclone->args[i].linear_step,
3233 ncopies * nunits);
3234 tree tcst = wide_int_to_tree (type, cst);
3235 tree phi_arg = copy_ssa_name (op);
3236 new_stmt
3237 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3238 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3239 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3240 set_vinfo_for_stmt (new_stmt,
3241 new_stmt_vec_info (new_stmt, loop_vinfo));
3242 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3243 UNKNOWN_LOCATION);
3244 arginfo[i].op = phi_res;
3245 vargs.safe_push (phi_res);
3247 else
3249 enum tree_code code
3250 = POINTER_TYPE_P (TREE_TYPE (op))
3251 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3252 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3253 ? sizetype : TREE_TYPE (op);
3254 widest_int cst
3255 = wi::mul (bestn->simdclone->args[i].linear_step,
3256 j * nunits);
3257 tree tcst = wide_int_to_tree (type, cst);
3258 new_temp = make_ssa_name (TREE_TYPE (op));
3259 new_stmt = gimple_build_assign (new_temp, code,
3260 arginfo[i].op, tcst);
3261 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3262 vargs.safe_push (new_temp);
3264 break;
3265 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3266 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3267 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3268 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3269 default:
3270 gcc_unreachable ();
3274 new_stmt = gimple_build_call_vec (fndecl, vargs);
3275 if (vec_dest)
3277 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3278 if (ratype)
3279 new_temp = create_tmp_var (ratype);
3280 else if (TYPE_VECTOR_SUBPARTS (vectype)
3281 == TYPE_VECTOR_SUBPARTS (rtype))
3282 new_temp = make_ssa_name (vec_dest, new_stmt);
3283 else
3284 new_temp = make_ssa_name (rtype, new_stmt);
3285 gimple_call_set_lhs (new_stmt, new_temp);
3287 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3289 if (vec_dest)
3291 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3293 unsigned int k, l;
3294 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3295 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3296 gcc_assert ((k & (k - 1)) == 0);
3297 for (l = 0; l < k; l++)
3299 tree t;
3300 if (ratype)
3302 t = build_fold_addr_expr (new_temp);
3303 t = build2 (MEM_REF, vectype, t,
3304 build_int_cst (TREE_TYPE (t),
3305 l * prec / BITS_PER_UNIT));
3307 else
3308 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3309 size_int (prec), bitsize_int (l * prec));
3310 new_stmt
3311 = gimple_build_assign (make_ssa_name (vectype), t);
3312 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3313 if (j == 0 && l == 0)
3314 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3315 else
3316 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3318 prev_stmt_info = vinfo_for_stmt (new_stmt);
3321 if (ratype)
3323 tree clobber = build_constructor (ratype, NULL);
3324 TREE_THIS_VOLATILE (clobber) = 1;
3325 new_stmt = gimple_build_assign (new_temp, clobber);
3326 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3328 continue;
3330 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3332 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3333 / TYPE_VECTOR_SUBPARTS (rtype));
3334 gcc_assert ((k & (k - 1)) == 0);
3335 if ((j & (k - 1)) == 0)
3336 vec_alloc (ret_ctor_elts, k);
3337 if (ratype)
3339 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3340 for (m = 0; m < o; m++)
3342 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3343 size_int (m), NULL_TREE, NULL_TREE);
3344 new_stmt
3345 = gimple_build_assign (make_ssa_name (rtype), tem);
3346 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3347 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3348 gimple_assign_lhs (new_stmt));
3350 tree clobber = build_constructor (ratype, NULL);
3351 TREE_THIS_VOLATILE (clobber) = 1;
3352 new_stmt = gimple_build_assign (new_temp, clobber);
3353 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3355 else
3356 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3357 if ((j & (k - 1)) != k - 1)
3358 continue;
3359 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3360 new_stmt
3361 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3362 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3364 if ((unsigned) j == k - 1)
3365 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3366 else
3367 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3369 prev_stmt_info = vinfo_for_stmt (new_stmt);
3370 continue;
3372 else if (ratype)
3374 tree t = build_fold_addr_expr (new_temp);
3375 t = build2 (MEM_REF, vectype, t,
3376 build_int_cst (TREE_TYPE (t), 0));
3377 new_stmt
3378 = gimple_build_assign (make_ssa_name (vec_dest), t);
3379 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3380 tree clobber = build_constructor (ratype, NULL);
3381 TREE_THIS_VOLATILE (clobber) = 1;
3382 vect_finish_stmt_generation (stmt,
3383 gimple_build_assign (new_temp,
3384 clobber), gsi);
3388 if (j == 0)
3389 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3390 else
3391 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3393 prev_stmt_info = vinfo_for_stmt (new_stmt);
3396 vargs.release ();
3398 /* The call in STMT might prevent it from being removed in dce.
3399 We however cannot remove it here, due to the way the ssa name
3400 it defines is mapped to the new definition. So just replace
3401 rhs of the statement with something harmless. */
3403 if (slp_node)
3404 return true;
3406 if (scalar_dest)
3408 type = TREE_TYPE (scalar_dest);
3409 if (is_pattern_stmt_p (stmt_info))
3410 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3411 else
3412 lhs = gimple_call_lhs (stmt);
3413 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3415 else
3416 new_stmt = gimple_build_nop ();
3417 set_vinfo_for_stmt (new_stmt, stmt_info);
3418 set_vinfo_for_stmt (stmt, NULL);
3419 STMT_VINFO_STMT (stmt_info) = new_stmt;
3420 gsi_replace (gsi, new_stmt, true);
3421 unlink_stmt_vdef (stmt);
3423 return true;
3427 /* Function vect_gen_widened_results_half
3429 Create a vector stmt whose code, type, number of arguments, and result
3430 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3431 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3432 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3433 needs to be created (DECL is a function-decl of a target-builtin).
3434 STMT is the original scalar stmt that we are vectorizing. */
3436 static gimple *
3437 vect_gen_widened_results_half (enum tree_code code,
3438 tree decl,
3439 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3440 tree vec_dest, gimple_stmt_iterator *gsi,
3441 gimple *stmt)
3443 gimple *new_stmt;
3444 tree new_temp;
3446 /* Generate half of the widened result: */
3447 if (code == CALL_EXPR)
3449 /* Target specific support */
3450 if (op_type == binary_op)
3451 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3452 else
3453 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3454 new_temp = make_ssa_name (vec_dest, new_stmt);
3455 gimple_call_set_lhs (new_stmt, new_temp);
3457 else
3459 /* Generic support */
3460 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3461 if (op_type != binary_op)
3462 vec_oprnd1 = NULL;
3463 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3464 new_temp = make_ssa_name (vec_dest, new_stmt);
3465 gimple_assign_set_lhs (new_stmt, new_temp);
3467 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3469 return new_stmt;
3473 /* Get vectorized definitions for loop-based vectorization. For the first
3474 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3475 scalar operand), and for the rest we get a copy with
3476 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3477 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3478 The vectors are collected into VEC_OPRNDS. */
3480 static void
3481 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3482 vec<tree> *vec_oprnds, int multi_step_cvt)
3484 tree vec_oprnd;
3486 /* Get first vector operand. */
3487 /* All the vector operands except the very first one (that is scalar oprnd)
3488 are stmt copies. */
3489 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3490 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3491 else
3492 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3494 vec_oprnds->quick_push (vec_oprnd);
3496 /* Get second vector operand. */
3497 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3498 vec_oprnds->quick_push (vec_oprnd);
3500 *oprnd = vec_oprnd;
3502 /* For conversion in multiple steps, continue to get operands
3503 recursively. */
3504 if (multi_step_cvt)
3505 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3509 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3510 For multi-step conversions store the resulting vectors and call the function
3511 recursively. */
3513 static void
3514 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3515 int multi_step_cvt, gimple *stmt,
3516 vec<tree> vec_dsts,
3517 gimple_stmt_iterator *gsi,
3518 slp_tree slp_node, enum tree_code code,
3519 stmt_vec_info *prev_stmt_info)
3521 unsigned int i;
3522 tree vop0, vop1, new_tmp, vec_dest;
3523 gimple *new_stmt;
3524 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3526 vec_dest = vec_dsts.pop ();
3528 for (i = 0; i < vec_oprnds->length (); i += 2)
3530 /* Create demotion operation. */
3531 vop0 = (*vec_oprnds)[i];
3532 vop1 = (*vec_oprnds)[i + 1];
3533 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3534 new_tmp = make_ssa_name (vec_dest, new_stmt);
3535 gimple_assign_set_lhs (new_stmt, new_tmp);
3536 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3538 if (multi_step_cvt)
3539 /* Store the resulting vector for next recursive call. */
3540 (*vec_oprnds)[i/2] = new_tmp;
3541 else
3543 /* This is the last step of the conversion sequence. Store the
3544 vectors in SLP_NODE or in vector info of the scalar statement
3545 (or in STMT_VINFO_RELATED_STMT chain). */
3546 if (slp_node)
3547 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3548 else
3550 if (!*prev_stmt_info)
3551 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3552 else
3553 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3555 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3560 /* For multi-step demotion operations we first generate demotion operations
3561 from the source type to the intermediate types, and then combine the
3562 results (stored in VEC_OPRNDS) in demotion operation to the destination
3563 type. */
3564 if (multi_step_cvt)
3566 /* At each level of recursion we have half of the operands we had at the
3567 previous level. */
3568 vec_oprnds->truncate ((i+1)/2);
3569 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3570 stmt, vec_dsts, gsi, slp_node,
3571 VEC_PACK_TRUNC_EXPR,
3572 prev_stmt_info);
3575 vec_dsts.quick_push (vec_dest);
3579 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3580 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3581 the resulting vectors and call the function recursively. */
3583 static void
3584 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3585 vec<tree> *vec_oprnds1,
3586 gimple *stmt, tree vec_dest,
3587 gimple_stmt_iterator *gsi,
3588 enum tree_code code1,
3589 enum tree_code code2, tree decl1,
3590 tree decl2, int op_type)
3592 int i;
3593 tree vop0, vop1, new_tmp1, new_tmp2;
3594 gimple *new_stmt1, *new_stmt2;
3595 vec<tree> vec_tmp = vNULL;
3597 vec_tmp.create (vec_oprnds0->length () * 2);
3598 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3600 if (op_type == binary_op)
3601 vop1 = (*vec_oprnds1)[i];
3602 else
3603 vop1 = NULL_TREE;
3605 /* Generate the two halves of promotion operation. */
3606 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3607 op_type, vec_dest, gsi, stmt);
3608 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3609 op_type, vec_dest, gsi, stmt);
3610 if (is_gimple_call (new_stmt1))
3612 new_tmp1 = gimple_call_lhs (new_stmt1);
3613 new_tmp2 = gimple_call_lhs (new_stmt2);
3615 else
3617 new_tmp1 = gimple_assign_lhs (new_stmt1);
3618 new_tmp2 = gimple_assign_lhs (new_stmt2);
3621 /* Store the results for the next step. */
3622 vec_tmp.quick_push (new_tmp1);
3623 vec_tmp.quick_push (new_tmp2);
3626 vec_oprnds0->release ();
3627 *vec_oprnds0 = vec_tmp;
3631 /* Check if STMT performs a conversion operation, that can be vectorized.
3632 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3633 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3634 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3636 static bool
3637 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
3638 gimple **vec_stmt, slp_tree slp_node)
3640 tree vec_dest;
3641 tree scalar_dest;
3642 tree op0, op1 = NULL_TREE;
3643 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3644 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3645 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3646 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3647 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3648 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3649 tree new_temp;
3650 gimple *def_stmt;
3651 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3652 gimple *new_stmt = NULL;
3653 stmt_vec_info prev_stmt_info;
3654 int nunits_in;
3655 int nunits_out;
3656 tree vectype_out, vectype_in;
3657 int ncopies, i, j;
3658 tree lhs_type, rhs_type;
3659 enum { NARROW, NONE, WIDEN } modifier;
3660 vec<tree> vec_oprnds0 = vNULL;
3661 vec<tree> vec_oprnds1 = vNULL;
3662 tree vop0;
3663 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3664 vec_info *vinfo = stmt_info->vinfo;
3665 int multi_step_cvt = 0;
3666 vec<tree> vec_dsts = vNULL;
3667 vec<tree> interm_types = vNULL;
3668 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3669 int op_type;
3670 machine_mode rhs_mode;
3671 unsigned short fltsz;
3673 /* Is STMT a vectorizable conversion? */
3675 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3676 return false;
3678 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3679 && ! vec_stmt)
3680 return false;
3682 if (!is_gimple_assign (stmt))
3683 return false;
3685 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3686 return false;
3688 code = gimple_assign_rhs_code (stmt);
3689 if (!CONVERT_EXPR_CODE_P (code)
3690 && code != FIX_TRUNC_EXPR
3691 && code != FLOAT_EXPR
3692 && code != WIDEN_MULT_EXPR
3693 && code != WIDEN_LSHIFT_EXPR)
3694 return false;
3696 op_type = TREE_CODE_LENGTH (code);
3698 /* Check types of lhs and rhs. */
3699 scalar_dest = gimple_assign_lhs (stmt);
3700 lhs_type = TREE_TYPE (scalar_dest);
3701 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3703 op0 = gimple_assign_rhs1 (stmt);
3704 rhs_type = TREE_TYPE (op0);
3706 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3707 && !((INTEGRAL_TYPE_P (lhs_type)
3708 && INTEGRAL_TYPE_P (rhs_type))
3709 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3710 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3711 return false;
3713 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
3714 && ((INTEGRAL_TYPE_P (lhs_type)
3715 && (TYPE_PRECISION (lhs_type)
3716 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3717 || (INTEGRAL_TYPE_P (rhs_type)
3718 && (TYPE_PRECISION (rhs_type)
3719 != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))))
3721 if (dump_enabled_p ())
3722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3723 "type conversion to/from bit-precision unsupported."
3724 "\n");
3725 return false;
3728 /* Check the operands of the operation. */
3729 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
3731 if (dump_enabled_p ())
3732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3733 "use not simple.\n");
3734 return false;
3736 if (op_type == binary_op)
3738 bool ok;
3740 op1 = gimple_assign_rhs2 (stmt);
3741 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3742 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3743 OP1. */
3744 if (CONSTANT_CLASS_P (op0))
3745 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
3746 else
3747 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
3749 if (!ok)
3751 if (dump_enabled_p ())
3752 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3753 "use not simple.\n");
3754 return false;
3758 /* If op0 is an external or constant defs use a vector type of
3759 the same size as the output vector type. */
3760 if (!vectype_in)
3761 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3762 if (vec_stmt)
3763 gcc_assert (vectype_in);
3764 if (!vectype_in)
3766 if (dump_enabled_p ())
3768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3769 "no vectype for scalar type ");
3770 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3771 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3774 return false;
3777 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3778 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
3780 if (dump_enabled_p ())
3782 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3783 "can't convert between boolean and non "
3784 "boolean vectors");
3785 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3786 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3789 return false;
3792 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3793 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3794 if (nunits_in < nunits_out)
3795 modifier = NARROW;
3796 else if (nunits_out == nunits_in)
3797 modifier = NONE;
3798 else
3799 modifier = WIDEN;
3801 /* Multiple types in SLP are handled by creating the appropriate number of
3802 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3803 case of SLP. */
3804 if (slp_node || PURE_SLP_STMT (stmt_info))
3805 ncopies = 1;
3806 else if (modifier == NARROW)
3807 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3808 else
3809 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3811 /* Sanity check: make sure that at least one copy of the vectorized stmt
3812 needs to be generated. */
3813 gcc_assert (ncopies >= 1);
3815 /* Supportable by target? */
3816 switch (modifier)
3818 case NONE:
3819 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3820 return false;
3821 if (supportable_convert_operation (code, vectype_out, vectype_in,
3822 &decl1, &code1))
3823 break;
3824 /* FALLTHRU */
3825 unsupported:
3826 if (dump_enabled_p ())
3827 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3828 "conversion not supported by target.\n");
3829 return false;
3831 case WIDEN:
3832 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3833 &code1, &code2, &multi_step_cvt,
3834 &interm_types))
3836 /* Binary widening operation can only be supported directly by the
3837 architecture. */
3838 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3839 break;
3842 if (code != FLOAT_EXPR
3843 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3844 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3845 goto unsupported;
3847 rhs_mode = TYPE_MODE (rhs_type);
3848 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3849 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3850 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3851 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3853 cvt_type
3854 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3855 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3856 if (cvt_type == NULL_TREE)
3857 goto unsupported;
3859 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3861 if (!supportable_convert_operation (code, vectype_out,
3862 cvt_type, &decl1, &codecvt1))
3863 goto unsupported;
3865 else if (!supportable_widening_operation (code, stmt, vectype_out,
3866 cvt_type, &codecvt1,
3867 &codecvt2, &multi_step_cvt,
3868 &interm_types))
3869 continue;
3870 else
3871 gcc_assert (multi_step_cvt == 0);
3873 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3874 vectype_in, &code1, &code2,
3875 &multi_step_cvt, &interm_types))
3876 break;
3879 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3880 goto unsupported;
3882 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3883 codecvt2 = ERROR_MARK;
3884 else
3886 multi_step_cvt++;
3887 interm_types.safe_push (cvt_type);
3888 cvt_type = NULL_TREE;
3890 break;
3892 case NARROW:
3893 gcc_assert (op_type == unary_op);
3894 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3895 &code1, &multi_step_cvt,
3896 &interm_types))
3897 break;
3899 if (code != FIX_TRUNC_EXPR
3900 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3901 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3902 goto unsupported;
3904 rhs_mode = TYPE_MODE (rhs_type);
3905 cvt_type
3906 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3907 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3908 if (cvt_type == NULL_TREE)
3909 goto unsupported;
3910 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3911 &decl1, &codecvt1))
3912 goto unsupported;
3913 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3914 &code1, &multi_step_cvt,
3915 &interm_types))
3916 break;
3917 goto unsupported;
3919 default:
3920 gcc_unreachable ();
3923 if (!vec_stmt) /* transformation not required. */
3925 if (dump_enabled_p ())
3926 dump_printf_loc (MSG_NOTE, vect_location,
3927 "=== vectorizable_conversion ===\n");
3928 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3930 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3931 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3933 else if (modifier == NARROW)
3935 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3936 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3938 else
3940 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3941 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3943 interm_types.release ();
3944 return true;
3947 /** Transform. **/
3948 if (dump_enabled_p ())
3949 dump_printf_loc (MSG_NOTE, vect_location,
3950 "transform conversion. ncopies = %d.\n", ncopies);
3952 if (op_type == binary_op)
3954 if (CONSTANT_CLASS_P (op0))
3955 op0 = fold_convert (TREE_TYPE (op1), op0);
3956 else if (CONSTANT_CLASS_P (op1))
3957 op1 = fold_convert (TREE_TYPE (op0), op1);
3960 /* In case of multi-step conversion, we first generate conversion operations
3961 to the intermediate types, and then from that types to the final one.
3962 We create vector destinations for the intermediate type (TYPES) received
3963 from supportable_*_operation, and store them in the correct order
3964 for future use in vect_create_vectorized_*_stmts (). */
3965 vec_dsts.create (multi_step_cvt + 1);
3966 vec_dest = vect_create_destination_var (scalar_dest,
3967 (cvt_type && modifier == WIDEN)
3968 ? cvt_type : vectype_out);
3969 vec_dsts.quick_push (vec_dest);
3971 if (multi_step_cvt)
3973 for (i = interm_types.length () - 1;
3974 interm_types.iterate (i, &intermediate_type); i--)
3976 vec_dest = vect_create_destination_var (scalar_dest,
3977 intermediate_type);
3978 vec_dsts.quick_push (vec_dest);
3982 if (cvt_type)
3983 vec_dest = vect_create_destination_var (scalar_dest,
3984 modifier == WIDEN
3985 ? vectype_out : cvt_type);
3987 if (!slp_node)
3989 if (modifier == WIDEN)
3991 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3992 if (op_type == binary_op)
3993 vec_oprnds1.create (1);
3995 else if (modifier == NARROW)
3996 vec_oprnds0.create (
3997 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3999 else if (code == WIDEN_LSHIFT_EXPR)
4000 vec_oprnds1.create (slp_node->vec_stmts_size);
4002 last_oprnd = op0;
4003 prev_stmt_info = NULL;
4004 switch (modifier)
4006 case NONE:
4007 for (j = 0; j < ncopies; j++)
4009 if (j == 0)
4010 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
4011 -1);
4012 else
4013 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4015 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4017 /* Arguments are ready, create the new vector stmt. */
4018 if (code1 == CALL_EXPR)
4020 new_stmt = gimple_build_call (decl1, 1, vop0);
4021 new_temp = make_ssa_name (vec_dest, new_stmt);
4022 gimple_call_set_lhs (new_stmt, new_temp);
4024 else
4026 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4027 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4028 new_temp = make_ssa_name (vec_dest, new_stmt);
4029 gimple_assign_set_lhs (new_stmt, new_temp);
4032 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4033 if (slp_node)
4034 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4035 else
4037 if (!prev_stmt_info)
4038 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4039 else
4040 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4041 prev_stmt_info = vinfo_for_stmt (new_stmt);
4045 break;
4047 case WIDEN:
4048 /* In case the vectorization factor (VF) is bigger than the number
4049 of elements that we can fit in a vectype (nunits), we have to
4050 generate more than one vector stmt - i.e - we need to "unroll"
4051 the vector stmt by a factor VF/nunits. */
4052 for (j = 0; j < ncopies; j++)
4054 /* Handle uses. */
4055 if (j == 0)
4057 if (slp_node)
4059 if (code == WIDEN_LSHIFT_EXPR)
4061 unsigned int k;
4063 vec_oprnd1 = op1;
4064 /* Store vec_oprnd1 for every vector stmt to be created
4065 for SLP_NODE. We check during the analysis that all
4066 the shift arguments are the same. */
4067 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4068 vec_oprnds1.quick_push (vec_oprnd1);
4070 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4071 slp_node, -1);
4073 else
4074 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4075 &vec_oprnds1, slp_node, -1);
4077 else
4079 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4080 vec_oprnds0.quick_push (vec_oprnd0);
4081 if (op_type == binary_op)
4083 if (code == WIDEN_LSHIFT_EXPR)
4084 vec_oprnd1 = op1;
4085 else
4086 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4087 vec_oprnds1.quick_push (vec_oprnd1);
4091 else
4093 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4094 vec_oprnds0.truncate (0);
4095 vec_oprnds0.quick_push (vec_oprnd0);
4096 if (op_type == binary_op)
4098 if (code == WIDEN_LSHIFT_EXPR)
4099 vec_oprnd1 = op1;
4100 else
4101 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4102 vec_oprnd1);
4103 vec_oprnds1.truncate (0);
4104 vec_oprnds1.quick_push (vec_oprnd1);
4108 /* Arguments are ready. Create the new vector stmts. */
4109 for (i = multi_step_cvt; i >= 0; i--)
4111 tree this_dest = vec_dsts[i];
4112 enum tree_code c1 = code1, c2 = code2;
4113 if (i == 0 && codecvt2 != ERROR_MARK)
4115 c1 = codecvt1;
4116 c2 = codecvt2;
4118 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4119 &vec_oprnds1,
4120 stmt, this_dest, gsi,
4121 c1, c2, decl1, decl2,
4122 op_type);
4125 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4127 if (cvt_type)
4129 if (codecvt1 == CALL_EXPR)
4131 new_stmt = gimple_build_call (decl1, 1, vop0);
4132 new_temp = make_ssa_name (vec_dest, new_stmt);
4133 gimple_call_set_lhs (new_stmt, new_temp);
4135 else
4137 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4138 new_temp = make_ssa_name (vec_dest);
4139 new_stmt = gimple_build_assign (new_temp, codecvt1,
4140 vop0);
4143 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4145 else
4146 new_stmt = SSA_NAME_DEF_STMT (vop0);
4148 if (slp_node)
4149 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4150 else
4152 if (!prev_stmt_info)
4153 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4154 else
4155 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4156 prev_stmt_info = vinfo_for_stmt (new_stmt);
4161 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4162 break;
4164 case NARROW:
4165 /* In case the vectorization factor (VF) is bigger than the number
4166 of elements that we can fit in a vectype (nunits), we have to
4167 generate more than one vector stmt - i.e - we need to "unroll"
4168 the vector stmt by a factor VF/nunits. */
4169 for (j = 0; j < ncopies; j++)
4171 /* Handle uses. */
4172 if (slp_node)
4173 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4174 slp_node, -1);
4175 else
4177 vec_oprnds0.truncate (0);
4178 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4179 vect_pow2 (multi_step_cvt) - 1);
4182 /* Arguments are ready. Create the new vector stmts. */
4183 if (cvt_type)
4184 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4186 if (codecvt1 == CALL_EXPR)
4188 new_stmt = gimple_build_call (decl1, 1, vop0);
4189 new_temp = make_ssa_name (vec_dest, new_stmt);
4190 gimple_call_set_lhs (new_stmt, new_temp);
4192 else
4194 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4195 new_temp = make_ssa_name (vec_dest);
4196 new_stmt = gimple_build_assign (new_temp, codecvt1,
4197 vop0);
4200 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4201 vec_oprnds0[i] = new_temp;
4204 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4205 stmt, vec_dsts, gsi,
4206 slp_node, code1,
4207 &prev_stmt_info);
4210 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4211 break;
4214 vec_oprnds0.release ();
4215 vec_oprnds1.release ();
4216 vec_dsts.release ();
4217 interm_types.release ();
4219 return true;
4223 /* Function vectorizable_assignment.
4225 Check if STMT performs an assignment (copy) that can be vectorized.
4226 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4227 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4228 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4230 static bool
4231 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4232 gimple **vec_stmt, slp_tree slp_node)
4234 tree vec_dest;
4235 tree scalar_dest;
4236 tree op;
4237 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4238 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4239 tree new_temp;
4240 gimple *def_stmt;
4241 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4242 int ncopies;
4243 int i, j;
4244 vec<tree> vec_oprnds = vNULL;
4245 tree vop;
4246 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4247 vec_info *vinfo = stmt_info->vinfo;
4248 gimple *new_stmt = NULL;
4249 stmt_vec_info prev_stmt_info = NULL;
4250 enum tree_code code;
4251 tree vectype_in;
4253 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4254 return false;
4256 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4257 && ! vec_stmt)
4258 return false;
4260 /* Is vectorizable assignment? */
4261 if (!is_gimple_assign (stmt))
4262 return false;
4264 scalar_dest = gimple_assign_lhs (stmt);
4265 if (TREE_CODE (scalar_dest) != SSA_NAME)
4266 return false;
4268 code = gimple_assign_rhs_code (stmt);
4269 if (gimple_assign_single_p (stmt)
4270 || code == PAREN_EXPR
4271 || CONVERT_EXPR_CODE_P (code))
4272 op = gimple_assign_rhs1 (stmt);
4273 else
4274 return false;
4276 if (code == VIEW_CONVERT_EXPR)
4277 op = TREE_OPERAND (op, 0);
4279 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4280 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4282 /* Multiple types in SLP are handled by creating the appropriate number of
4283 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4284 case of SLP. */
4285 if (slp_node || PURE_SLP_STMT (stmt_info))
4286 ncopies = 1;
4287 else
4288 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4290 gcc_assert (ncopies >= 1);
4292 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4294 if (dump_enabled_p ())
4295 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4296 "use not simple.\n");
4297 return false;
4300 /* We can handle NOP_EXPR conversions that do not change the number
4301 of elements or the vector size. */
4302 if ((CONVERT_EXPR_CODE_P (code)
4303 || code == VIEW_CONVERT_EXPR)
4304 && (!vectype_in
4305 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4306 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4307 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4308 return false;
4310 /* We do not handle bit-precision changes. */
4311 if ((CONVERT_EXPR_CODE_P (code)
4312 || code == VIEW_CONVERT_EXPR)
4313 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4314 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4315 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4316 || ((TYPE_PRECISION (TREE_TYPE (op))
4317 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4318 /* But a conversion that does not change the bit-pattern is ok. */
4319 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4320 > TYPE_PRECISION (TREE_TYPE (op)))
4321 && TYPE_UNSIGNED (TREE_TYPE (op)))
4322 /* Conversion between boolean types of different sizes is
4323 a simple assignment in case their vectypes are same
4324 boolean vectors. */
4325 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4326 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4328 if (dump_enabled_p ())
4329 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4330 "type conversion to/from bit-precision "
4331 "unsupported.\n");
4332 return false;
4335 if (!vec_stmt) /* transformation not required. */
4337 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4338 if (dump_enabled_p ())
4339 dump_printf_loc (MSG_NOTE, vect_location,
4340 "=== vectorizable_assignment ===\n");
4341 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4342 return true;
4345 /** Transform. **/
4346 if (dump_enabled_p ())
4347 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4349 /* Handle def. */
4350 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4352 /* Handle use. */
4353 for (j = 0; j < ncopies; j++)
4355 /* Handle uses. */
4356 if (j == 0)
4357 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4358 else
4359 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4361 /* Arguments are ready. create the new vector stmt. */
4362 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4364 if (CONVERT_EXPR_CODE_P (code)
4365 || code == VIEW_CONVERT_EXPR)
4366 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4367 new_stmt = gimple_build_assign (vec_dest, vop);
4368 new_temp = make_ssa_name (vec_dest, new_stmt);
4369 gimple_assign_set_lhs (new_stmt, new_temp);
4370 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4371 if (slp_node)
4372 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4375 if (slp_node)
4376 continue;
4378 if (j == 0)
4379 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4380 else
4381 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4383 prev_stmt_info = vinfo_for_stmt (new_stmt);
4386 vec_oprnds.release ();
4387 return true;
4391 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4392 either as shift by a scalar or by a vector. */
4394 bool
4395 vect_supportable_shift (enum tree_code code, tree scalar_type)
4398 machine_mode vec_mode;
4399 optab optab;
4400 int icode;
4401 tree vectype;
4403 vectype = get_vectype_for_scalar_type (scalar_type);
4404 if (!vectype)
4405 return false;
4407 optab = optab_for_tree_code (code, vectype, optab_scalar);
4408 if (!optab
4409 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4411 optab = optab_for_tree_code (code, vectype, optab_vector);
4412 if (!optab
4413 || (optab_handler (optab, TYPE_MODE (vectype))
4414 == CODE_FOR_nothing))
4415 return false;
4418 vec_mode = TYPE_MODE (vectype);
4419 icode = (int) optab_handler (optab, vec_mode);
4420 if (icode == CODE_FOR_nothing)
4421 return false;
4423 return true;
4427 /* Function vectorizable_shift.
4429 Check if STMT performs a shift operation that can be vectorized.
4430 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4431 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4432 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4434 static bool
4435 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4436 gimple **vec_stmt, slp_tree slp_node)
4438 tree vec_dest;
4439 tree scalar_dest;
4440 tree op0, op1 = NULL;
4441 tree vec_oprnd1 = NULL_TREE;
4442 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4443 tree vectype;
4444 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4445 enum tree_code code;
4446 machine_mode vec_mode;
4447 tree new_temp;
4448 optab optab;
4449 int icode;
4450 machine_mode optab_op2_mode;
4451 gimple *def_stmt;
4452 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4453 gimple *new_stmt = NULL;
4454 stmt_vec_info prev_stmt_info;
4455 int nunits_in;
4456 int nunits_out;
4457 tree vectype_out;
4458 tree op1_vectype;
4459 int ncopies;
4460 int j, i;
4461 vec<tree> vec_oprnds0 = vNULL;
4462 vec<tree> vec_oprnds1 = vNULL;
4463 tree vop0, vop1;
4464 unsigned int k;
4465 bool scalar_shift_arg = true;
4466 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4467 vec_info *vinfo = stmt_info->vinfo;
4468 int vf;
4470 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4471 return false;
4473 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4474 && ! vec_stmt)
4475 return false;
4477 /* Is STMT a vectorizable binary/unary operation? */
4478 if (!is_gimple_assign (stmt))
4479 return false;
4481 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4482 return false;
4484 code = gimple_assign_rhs_code (stmt);
4486 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4487 || code == RROTATE_EXPR))
4488 return false;
4490 scalar_dest = gimple_assign_lhs (stmt);
4491 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4492 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4493 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4495 if (dump_enabled_p ())
4496 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4497 "bit-precision shifts not supported.\n");
4498 return false;
4501 op0 = gimple_assign_rhs1 (stmt);
4502 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4504 if (dump_enabled_p ())
4505 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4506 "use not simple.\n");
4507 return false;
4509 /* If op0 is an external or constant def use a vector type with
4510 the same size as the output vector type. */
4511 if (!vectype)
4512 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4513 if (vec_stmt)
4514 gcc_assert (vectype);
4515 if (!vectype)
4517 if (dump_enabled_p ())
4518 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4519 "no vectype for scalar type\n");
4520 return false;
4523 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4524 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4525 if (nunits_out != nunits_in)
4526 return false;
4528 op1 = gimple_assign_rhs2 (stmt);
4529 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4531 if (dump_enabled_p ())
4532 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4533 "use not simple.\n");
4534 return false;
4537 if (loop_vinfo)
4538 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4539 else
4540 vf = 1;
4542 /* Multiple types in SLP are handled by creating the appropriate number of
4543 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4544 case of SLP. */
4545 if (slp_node || PURE_SLP_STMT (stmt_info))
4546 ncopies = 1;
4547 else
4548 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4550 gcc_assert (ncopies >= 1);
4552 /* Determine whether the shift amount is a vector, or scalar. If the
4553 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4555 if ((dt[1] == vect_internal_def
4556 || dt[1] == vect_induction_def)
4557 && !slp_node)
4558 scalar_shift_arg = false;
4559 else if (dt[1] == vect_constant_def
4560 || dt[1] == vect_external_def
4561 || dt[1] == vect_internal_def)
4563 /* In SLP, need to check whether the shift count is the same,
4564 in loops if it is a constant or invariant, it is always
4565 a scalar shift. */
4566 if (slp_node)
4568 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4569 gimple *slpstmt;
4571 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4572 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4573 scalar_shift_arg = false;
4576 else
4578 if (dump_enabled_p ())
4579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4580 "operand mode requires invariant argument.\n");
4581 return false;
4584 /* Vector shifted by vector. */
4585 if (!scalar_shift_arg)
4587 optab = optab_for_tree_code (code, vectype, optab_vector);
4588 if (dump_enabled_p ())
4589 dump_printf_loc (MSG_NOTE, vect_location,
4590 "vector/vector shift/rotate found.\n");
4592 if (!op1_vectype)
4593 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4594 if (op1_vectype == NULL_TREE
4595 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4597 if (dump_enabled_p ())
4598 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4599 "unusable type for last operand in"
4600 " vector/vector shift/rotate.\n");
4601 return false;
4604 /* See if the machine has a vector shifted by scalar insn and if not
4605 then see if it has a vector shifted by vector insn. */
4606 else
4608 optab = optab_for_tree_code (code, vectype, optab_scalar);
4609 if (optab
4610 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4612 if (dump_enabled_p ())
4613 dump_printf_loc (MSG_NOTE, vect_location,
4614 "vector/scalar shift/rotate found.\n");
4616 else
4618 optab = optab_for_tree_code (code, vectype, optab_vector);
4619 if (optab
4620 && (optab_handler (optab, TYPE_MODE (vectype))
4621 != CODE_FOR_nothing))
4623 scalar_shift_arg = false;
4625 if (dump_enabled_p ())
4626 dump_printf_loc (MSG_NOTE, vect_location,
4627 "vector/vector shift/rotate found.\n");
4629 /* Unlike the other binary operators, shifts/rotates have
4630 the rhs being int, instead of the same type as the lhs,
4631 so make sure the scalar is the right type if we are
4632 dealing with vectors of long long/long/short/char. */
4633 if (dt[1] == vect_constant_def)
4634 op1 = fold_convert (TREE_TYPE (vectype), op1);
4635 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4636 TREE_TYPE (op1)))
4638 if (slp_node
4639 && TYPE_MODE (TREE_TYPE (vectype))
4640 != TYPE_MODE (TREE_TYPE (op1)))
4642 if (dump_enabled_p ())
4643 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4644 "unusable type for last operand in"
4645 " vector/vector shift/rotate.\n");
4646 return false;
4648 if (vec_stmt && !slp_node)
4650 op1 = fold_convert (TREE_TYPE (vectype), op1);
4651 op1 = vect_init_vector (stmt, op1,
4652 TREE_TYPE (vectype), NULL);
4659 /* Supportable by target? */
4660 if (!optab)
4662 if (dump_enabled_p ())
4663 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4664 "no optab.\n");
4665 return false;
4667 vec_mode = TYPE_MODE (vectype);
4668 icode = (int) optab_handler (optab, vec_mode);
4669 if (icode == CODE_FOR_nothing)
4671 if (dump_enabled_p ())
4672 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4673 "op not supported by target.\n");
4674 /* Check only during analysis. */
4675 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4676 || (vf < vect_min_worthwhile_factor (code)
4677 && !vec_stmt))
4678 return false;
4679 if (dump_enabled_p ())
4680 dump_printf_loc (MSG_NOTE, vect_location,
4681 "proceeding using word mode.\n");
4684 /* Worthwhile without SIMD support? Check only during analysis. */
4685 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4686 && vf < vect_min_worthwhile_factor (code)
4687 && !vec_stmt)
4689 if (dump_enabled_p ())
4690 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4691 "not worthwhile without SIMD support.\n");
4692 return false;
4695 if (!vec_stmt) /* transformation not required. */
4697 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4698 if (dump_enabled_p ())
4699 dump_printf_loc (MSG_NOTE, vect_location,
4700 "=== vectorizable_shift ===\n");
4701 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4702 return true;
4705 /** Transform. **/
4707 if (dump_enabled_p ())
4708 dump_printf_loc (MSG_NOTE, vect_location,
4709 "transform binary/unary operation.\n");
4711 /* Handle def. */
4712 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4714 prev_stmt_info = NULL;
4715 for (j = 0; j < ncopies; j++)
4717 /* Handle uses. */
4718 if (j == 0)
4720 if (scalar_shift_arg)
4722 /* Vector shl and shr insn patterns can be defined with scalar
4723 operand 2 (shift operand). In this case, use constant or loop
4724 invariant op1 directly, without extending it to vector mode
4725 first. */
4726 optab_op2_mode = insn_data[icode].operand[2].mode;
4727 if (!VECTOR_MODE_P (optab_op2_mode))
4729 if (dump_enabled_p ())
4730 dump_printf_loc (MSG_NOTE, vect_location,
4731 "operand 1 using scalar mode.\n");
4732 vec_oprnd1 = op1;
4733 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4734 vec_oprnds1.quick_push (vec_oprnd1);
4735 if (slp_node)
4737 /* Store vec_oprnd1 for every vector stmt to be created
4738 for SLP_NODE. We check during the analysis that all
4739 the shift arguments are the same.
4740 TODO: Allow different constants for different vector
4741 stmts generated for an SLP instance. */
4742 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4743 vec_oprnds1.quick_push (vec_oprnd1);
4748 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4749 (a special case for certain kind of vector shifts); otherwise,
4750 operand 1 should be of a vector type (the usual case). */
4751 if (vec_oprnd1)
4752 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4753 slp_node, -1);
4754 else
4755 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4756 slp_node, -1);
4758 else
4759 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4761 /* Arguments are ready. Create the new vector stmt. */
4762 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4764 vop1 = vec_oprnds1[i];
4765 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4766 new_temp = make_ssa_name (vec_dest, new_stmt);
4767 gimple_assign_set_lhs (new_stmt, new_temp);
4768 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4769 if (slp_node)
4770 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4773 if (slp_node)
4774 continue;
4776 if (j == 0)
4777 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4778 else
4779 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4780 prev_stmt_info = vinfo_for_stmt (new_stmt);
4783 vec_oprnds0.release ();
4784 vec_oprnds1.release ();
4786 return true;
4790 /* Function vectorizable_operation.
4792 Check if STMT performs a binary, unary or ternary operation that can
4793 be vectorized.
4794 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4795 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4796 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4798 static bool
4799 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
4800 gimple **vec_stmt, slp_tree slp_node)
4802 tree vec_dest;
4803 tree scalar_dest;
4804 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4805 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4806 tree vectype;
4807 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4808 enum tree_code code;
4809 machine_mode vec_mode;
4810 tree new_temp;
4811 int op_type;
4812 optab optab;
4813 bool target_support_p;
4814 gimple *def_stmt;
4815 enum vect_def_type dt[3]
4816 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4817 gimple *new_stmt = NULL;
4818 stmt_vec_info prev_stmt_info;
4819 int nunits_in;
4820 int nunits_out;
4821 tree vectype_out;
4822 int ncopies;
4823 int j, i;
4824 vec<tree> vec_oprnds0 = vNULL;
4825 vec<tree> vec_oprnds1 = vNULL;
4826 vec<tree> vec_oprnds2 = vNULL;
4827 tree vop0, vop1, vop2;
4828 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4829 vec_info *vinfo = stmt_info->vinfo;
4830 int vf;
4832 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4833 return false;
4835 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4836 && ! vec_stmt)
4837 return false;
4839 /* Is STMT a vectorizable binary/unary operation? */
4840 if (!is_gimple_assign (stmt))
4841 return false;
4843 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4844 return false;
4846 code = gimple_assign_rhs_code (stmt);
4848 /* For pointer addition, we should use the normal plus for
4849 the vector addition. */
4850 if (code == POINTER_PLUS_EXPR)
4851 code = PLUS_EXPR;
4853 /* Support only unary or binary operations. */
4854 op_type = TREE_CODE_LENGTH (code);
4855 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4857 if (dump_enabled_p ())
4858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4859 "num. args = %d (not unary/binary/ternary op).\n",
4860 op_type);
4861 return false;
4864 scalar_dest = gimple_assign_lhs (stmt);
4865 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4867 /* Most operations cannot handle bit-precision types without extra
4868 truncations. */
4869 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4870 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4871 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4872 /* Exception are bitwise binary operations. */
4873 && code != BIT_IOR_EXPR
4874 && code != BIT_XOR_EXPR
4875 && code != BIT_AND_EXPR)
4877 if (dump_enabled_p ())
4878 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4879 "bit-precision arithmetic not supported.\n");
4880 return false;
4883 op0 = gimple_assign_rhs1 (stmt);
4884 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4886 if (dump_enabled_p ())
4887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4888 "use not simple.\n");
4889 return false;
4891 /* If op0 is an external or constant def use a vector type with
4892 the same size as the output vector type. */
4893 if (!vectype)
4895 /* For boolean type we cannot determine vectype by
4896 invariant value (don't know whether it is a vector
4897 of booleans or vector of integers). We use output
4898 vectype because operations on boolean don't change
4899 type. */
4900 if (TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE)
4902 if (TREE_CODE (TREE_TYPE (scalar_dest)) != BOOLEAN_TYPE)
4904 if (dump_enabled_p ())
4905 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4906 "not supported operation on bool value.\n");
4907 return false;
4909 vectype = vectype_out;
4911 else
4912 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4914 if (vec_stmt)
4915 gcc_assert (vectype);
4916 if (!vectype)
4918 if (dump_enabled_p ())
4920 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4921 "no vectype for scalar type ");
4922 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4923 TREE_TYPE (op0));
4924 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4927 return false;
4930 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4931 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4932 if (nunits_out != nunits_in)
4933 return false;
4935 if (op_type == binary_op || op_type == ternary_op)
4937 op1 = gimple_assign_rhs2 (stmt);
4938 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
4940 if (dump_enabled_p ())
4941 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4942 "use not simple.\n");
4943 return false;
4946 if (op_type == ternary_op)
4948 op2 = gimple_assign_rhs3 (stmt);
4949 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
4951 if (dump_enabled_p ())
4952 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4953 "use not simple.\n");
4954 return false;
4958 if (loop_vinfo)
4959 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4960 else
4961 vf = 1;
4963 /* Multiple types in SLP are handled by creating the appropriate number of
4964 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4965 case of SLP. */
4966 if (slp_node || PURE_SLP_STMT (stmt_info))
4967 ncopies = 1;
4968 else
4969 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4971 gcc_assert (ncopies >= 1);
4973 /* Shifts are handled in vectorizable_shift (). */
4974 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4975 || code == RROTATE_EXPR)
4976 return false;
4978 /* Supportable by target? */
4980 vec_mode = TYPE_MODE (vectype);
4981 if (code == MULT_HIGHPART_EXPR)
4982 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
4983 else
4985 optab = optab_for_tree_code (code, vectype, optab_default);
4986 if (!optab)
4988 if (dump_enabled_p ())
4989 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4990 "no optab.\n");
4991 return false;
4993 target_support_p = (optab_handler (optab, vec_mode)
4994 != CODE_FOR_nothing);
4997 if (!target_support_p)
4999 if (dump_enabled_p ())
5000 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5001 "op not supported by target.\n");
5002 /* Check only during analysis. */
5003 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5004 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
5005 return false;
5006 if (dump_enabled_p ())
5007 dump_printf_loc (MSG_NOTE, vect_location,
5008 "proceeding using word mode.\n");
5011 /* Worthwhile without SIMD support? Check only during analysis. */
5012 if (!VECTOR_MODE_P (vec_mode)
5013 && !vec_stmt
5014 && vf < vect_min_worthwhile_factor (code))
5016 if (dump_enabled_p ())
5017 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5018 "not worthwhile without SIMD support.\n");
5019 return false;
5022 if (!vec_stmt) /* transformation not required. */
5024 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5025 if (dump_enabled_p ())
5026 dump_printf_loc (MSG_NOTE, vect_location,
5027 "=== vectorizable_operation ===\n");
5028 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
5029 return true;
5032 /** Transform. **/
5034 if (dump_enabled_p ())
5035 dump_printf_loc (MSG_NOTE, vect_location,
5036 "transform binary/unary operation.\n");
5038 /* Handle def. */
5039 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5041 /* In case the vectorization factor (VF) is bigger than the number
5042 of elements that we can fit in a vectype (nunits), we have to generate
5043 more than one vector stmt - i.e - we need to "unroll" the
5044 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5045 from one copy of the vector stmt to the next, in the field
5046 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5047 stages to find the correct vector defs to be used when vectorizing
5048 stmts that use the defs of the current stmt. The example below
5049 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5050 we need to create 4 vectorized stmts):
5052 before vectorization:
5053 RELATED_STMT VEC_STMT
5054 S1: x = memref - -
5055 S2: z = x + 1 - -
5057 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5058 there):
5059 RELATED_STMT VEC_STMT
5060 VS1_0: vx0 = memref0 VS1_1 -
5061 VS1_1: vx1 = memref1 VS1_2 -
5062 VS1_2: vx2 = memref2 VS1_3 -
5063 VS1_3: vx3 = memref3 - -
5064 S1: x = load - VS1_0
5065 S2: z = x + 1 - -
5067 step2: vectorize stmt S2 (done here):
5068 To vectorize stmt S2 we first need to find the relevant vector
5069 def for the first operand 'x'. This is, as usual, obtained from
5070 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5071 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5072 relevant vector def 'vx0'. Having found 'vx0' we can generate
5073 the vector stmt VS2_0, and as usual, record it in the
5074 STMT_VINFO_VEC_STMT of stmt S2.
5075 When creating the second copy (VS2_1), we obtain the relevant vector
5076 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5077 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5078 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5079 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5080 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5081 chain of stmts and pointers:
5082 RELATED_STMT VEC_STMT
5083 VS1_0: vx0 = memref0 VS1_1 -
5084 VS1_1: vx1 = memref1 VS1_2 -
5085 VS1_2: vx2 = memref2 VS1_3 -
5086 VS1_3: vx3 = memref3 - -
5087 S1: x = load - VS1_0
5088 VS2_0: vz0 = vx0 + v1 VS2_1 -
5089 VS2_1: vz1 = vx1 + v1 VS2_2 -
5090 VS2_2: vz2 = vx2 + v1 VS2_3 -
5091 VS2_3: vz3 = vx3 + v1 - -
5092 S2: z = x + 1 - VS2_0 */
5094 prev_stmt_info = NULL;
5095 for (j = 0; j < ncopies; j++)
5097 /* Handle uses. */
5098 if (j == 0)
5100 if (op_type == binary_op || op_type == ternary_op)
5101 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5102 slp_node, -1);
5103 else
5104 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5105 slp_node, -1);
5106 if (op_type == ternary_op)
5108 vec_oprnds2.create (1);
5109 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
5110 stmt));
5113 else
5115 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5116 if (op_type == ternary_op)
5118 tree vec_oprnd = vec_oprnds2.pop ();
5119 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5120 vec_oprnd));
5124 /* Arguments are ready. Create the new vector stmt. */
5125 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5127 vop1 = ((op_type == binary_op || op_type == ternary_op)
5128 ? vec_oprnds1[i] : NULL_TREE);
5129 vop2 = ((op_type == ternary_op)
5130 ? vec_oprnds2[i] : NULL_TREE);
5131 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5132 new_temp = make_ssa_name (vec_dest, new_stmt);
5133 gimple_assign_set_lhs (new_stmt, new_temp);
5134 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5135 if (slp_node)
5136 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5139 if (slp_node)
5140 continue;
5142 if (j == 0)
5143 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5144 else
5145 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5146 prev_stmt_info = vinfo_for_stmt (new_stmt);
5149 vec_oprnds0.release ();
5150 vec_oprnds1.release ();
5151 vec_oprnds2.release ();
5153 return true;
5156 /* A helper function to ensure data reference DR's base alignment
5157 for STMT_INFO. */
5159 static void
5160 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
5162 if (!dr->aux)
5163 return;
5165 if (DR_VECT_AUX (dr)->base_misaligned)
5167 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5168 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5170 if (decl_in_symtab_p (base_decl))
5171 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
5172 else
5174 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
5175 DECL_USER_ALIGN (base_decl) = 1;
5177 DR_VECT_AUX (dr)->base_misaligned = false;
5182 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
5183 reversal of the vector elements. If that is impossible to do,
5184 returns NULL. */
5186 static tree
5187 perm_mask_for_reverse (tree vectype)
5189 int i, nunits;
5190 unsigned char *sel;
5192 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5193 sel = XALLOCAVEC (unsigned char, nunits);
5195 for (i = 0; i < nunits; ++i)
5196 sel[i] = nunits - 1 - i;
5198 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
5199 return NULL_TREE;
5200 return vect_gen_perm_mask_checked (vectype, sel);
5203 /* Function vectorizable_store.
5205 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5206 can be vectorized.
5207 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5208 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5209 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5211 static bool
5212 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5213 slp_tree slp_node)
5215 tree scalar_dest;
5216 tree data_ref;
5217 tree op;
5218 tree vec_oprnd = NULL_TREE;
5219 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5220 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5221 tree elem_type;
5222 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5223 struct loop *loop = NULL;
5224 machine_mode vec_mode;
5225 tree dummy;
5226 enum dr_alignment_support alignment_support_scheme;
5227 gimple *def_stmt;
5228 enum vect_def_type dt;
5229 stmt_vec_info prev_stmt_info = NULL;
5230 tree dataref_ptr = NULL_TREE;
5231 tree dataref_offset = NULL_TREE;
5232 gimple *ptr_incr = NULL;
5233 int ncopies;
5234 int j;
5235 gimple *next_stmt, *first_stmt = NULL;
5236 bool grouped_store = false;
5237 bool store_lanes_p = false;
5238 unsigned int group_size, i;
5239 vec<tree> dr_chain = vNULL;
5240 vec<tree> oprnds = vNULL;
5241 vec<tree> result_chain = vNULL;
5242 bool inv_p;
5243 bool negative = false;
5244 tree offset = NULL_TREE;
5245 vec<tree> vec_oprnds = vNULL;
5246 bool slp = (slp_node != NULL);
5247 unsigned int vec_num;
5248 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5249 vec_info *vinfo = stmt_info->vinfo;
5250 tree aggr_type;
5251 tree scatter_base = NULL_TREE, scatter_off = NULL_TREE;
5252 tree scatter_off_vectype = NULL_TREE, scatter_decl = NULL_TREE;
5253 int scatter_scale = 1;
5254 enum vect_def_type scatter_idx_dt = vect_unknown_def_type;
5255 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5256 gimple *new_stmt;
5258 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5259 return false;
5261 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5262 && ! vec_stmt)
5263 return false;
5265 /* Is vectorizable store? */
5267 if (!is_gimple_assign (stmt))
5268 return false;
5270 scalar_dest = gimple_assign_lhs (stmt);
5271 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5272 && is_pattern_stmt_p (stmt_info))
5273 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5274 if (TREE_CODE (scalar_dest) != ARRAY_REF
5275 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5276 && TREE_CODE (scalar_dest) != INDIRECT_REF
5277 && TREE_CODE (scalar_dest) != COMPONENT_REF
5278 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5279 && TREE_CODE (scalar_dest) != REALPART_EXPR
5280 && TREE_CODE (scalar_dest) != MEM_REF)
5281 return false;
5283 gcc_assert (gimple_assign_single_p (stmt));
5285 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5286 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5288 if (loop_vinfo)
5289 loop = LOOP_VINFO_LOOP (loop_vinfo);
5291 /* Multiple types in SLP are handled by creating the appropriate number of
5292 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5293 case of SLP. */
5294 if (slp || PURE_SLP_STMT (stmt_info))
5295 ncopies = 1;
5296 else
5297 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5299 gcc_assert (ncopies >= 1);
5301 /* FORNOW. This restriction should be relaxed. */
5302 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5304 if (dump_enabled_p ())
5305 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5306 "multiple types in nested loop.\n");
5307 return false;
5310 op = gimple_assign_rhs1 (stmt);
5312 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
5314 if (dump_enabled_p ())
5315 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5316 "use not simple.\n");
5317 return false;
5320 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5321 return false;
5323 elem_type = TREE_TYPE (vectype);
5324 vec_mode = TYPE_MODE (vectype);
5326 /* FORNOW. In some cases can vectorize even if data-type not supported
5327 (e.g. - array initialization with 0). */
5328 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5329 return false;
5331 if (!STMT_VINFO_DATA_REF (stmt_info))
5332 return false;
5334 if (!STMT_VINFO_STRIDED_P (stmt_info))
5336 negative =
5337 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5338 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5339 size_zero_node) < 0;
5340 if (negative && ncopies > 1)
5342 if (dump_enabled_p ())
5343 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5344 "multiple types with negative step.\n");
5345 return false;
5347 if (negative)
5349 gcc_assert (!grouped_store);
5350 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5351 if (alignment_support_scheme != dr_aligned
5352 && alignment_support_scheme != dr_unaligned_supported)
5354 if (dump_enabled_p ())
5355 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5356 "negative step but alignment required.\n");
5357 return false;
5359 if (dt != vect_constant_def
5360 && dt != vect_external_def
5361 && !perm_mask_for_reverse (vectype))
5363 if (dump_enabled_p ())
5364 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5365 "negative step and reversing not supported.\n");
5366 return false;
5371 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5373 grouped_store = true;
5374 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5375 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5376 if (!slp
5377 && !PURE_SLP_STMT (stmt_info)
5378 && !STMT_VINFO_STRIDED_P (stmt_info))
5380 if (vect_store_lanes_supported (vectype, group_size))
5381 store_lanes_p = true;
5382 else if (!vect_grouped_store_supported (vectype, group_size))
5383 return false;
5386 if (STMT_VINFO_STRIDED_P (stmt_info)
5387 && (slp || PURE_SLP_STMT (stmt_info))
5388 && (group_size > nunits
5389 || nunits % group_size != 0))
5391 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5392 "unhandled strided group store\n");
5393 return false;
5396 if (first_stmt == stmt)
5398 /* STMT is the leader of the group. Check the operands of all the
5399 stmts of the group. */
5400 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5401 while (next_stmt)
5403 gcc_assert (gimple_assign_single_p (next_stmt));
5404 op = gimple_assign_rhs1 (next_stmt);
5405 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
5407 if (dump_enabled_p ())
5408 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5409 "use not simple.\n");
5410 return false;
5412 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5417 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5419 gimple *def_stmt;
5420 scatter_decl = vect_check_gather_scatter (stmt, loop_vinfo, &scatter_base,
5421 &scatter_off, &scatter_scale);
5422 gcc_assert (scatter_decl);
5423 if (!vect_is_simple_use (scatter_off, vinfo, &def_stmt, &scatter_idx_dt,
5424 &scatter_off_vectype))
5426 if (dump_enabled_p ())
5427 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5428 "scatter index use not simple.");
5429 return false;
5433 if (!vec_stmt) /* transformation not required. */
5435 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5436 /* The SLP costs are calculated during SLP analysis. */
5437 if (!PURE_SLP_STMT (stmt_info))
5438 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5439 NULL, NULL, NULL);
5440 return true;
5443 /** Transform. **/
5445 ensure_base_align (stmt_info, dr);
5447 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5449 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5450 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (scatter_decl));
5451 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5452 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5453 edge pe = loop_preheader_edge (loop);
5454 gimple_seq seq;
5455 basic_block new_bb;
5456 enum { NARROW, NONE, WIDEN } modifier;
5457 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (scatter_off_vectype);
5459 if (nunits == (unsigned int) scatter_off_nunits)
5460 modifier = NONE;
5461 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5463 unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
5464 modifier = WIDEN;
5466 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5467 sel[i] = i | nunits;
5469 perm_mask = vect_gen_perm_mask_checked (scatter_off_vectype, sel);
5470 gcc_assert (perm_mask != NULL_TREE);
5472 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5474 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5475 modifier = NARROW;
5477 for (i = 0; i < (unsigned int) nunits; ++i)
5478 sel[i] = i | scatter_off_nunits;
5480 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5481 gcc_assert (perm_mask != NULL_TREE);
5482 ncopies *= 2;
5484 else
5485 gcc_unreachable ();
5487 rettype = TREE_TYPE (TREE_TYPE (scatter_decl));
5488 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5489 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5490 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5491 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5492 scaletype = TREE_VALUE (arglist);
5494 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5495 && TREE_CODE (rettype) == VOID_TYPE);
5497 ptr = fold_convert (ptrtype, scatter_base);
5498 if (!is_gimple_min_invariant (ptr))
5500 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5501 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5502 gcc_assert (!new_bb);
5505 /* Currently we support only unconditional scatter stores,
5506 so mask should be all ones. */
5507 mask = build_int_cst (masktype, -1);
5508 mask = vect_init_vector (stmt, mask, masktype, NULL);
5510 scale = build_int_cst (scaletype, scatter_scale);
5512 prev_stmt_info = NULL;
5513 for (j = 0; j < ncopies; ++j)
5515 if (j == 0)
5517 src = vec_oprnd1
5518 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
5519 op = vec_oprnd0
5520 = vect_get_vec_def_for_operand (scatter_off, stmt);
5522 else if (modifier != NONE && (j & 1))
5524 if (modifier == WIDEN)
5526 src = vec_oprnd1
5527 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5528 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5529 stmt, gsi);
5531 else if (modifier == NARROW)
5533 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5534 stmt, gsi);
5535 op = vec_oprnd0
5536 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
5538 else
5539 gcc_unreachable ();
5541 else
5543 src = vec_oprnd1
5544 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5545 op = vec_oprnd0
5546 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
5549 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5551 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5552 == TYPE_VECTOR_SUBPARTS (srctype));
5553 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5554 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5555 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5556 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5557 src = var;
5560 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5562 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5563 == TYPE_VECTOR_SUBPARTS (idxtype));
5564 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5565 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5566 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5567 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5568 op = var;
5571 new_stmt
5572 = gimple_build_call (scatter_decl, 5, ptr, mask, op, src, scale);
5574 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5576 if (prev_stmt_info == NULL)
5577 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5578 else
5579 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5580 prev_stmt_info = vinfo_for_stmt (new_stmt);
5582 return true;
5585 if (grouped_store)
5587 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5588 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5590 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5592 /* FORNOW */
5593 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5595 /* We vectorize all the stmts of the interleaving group when we
5596 reach the last stmt in the group. */
5597 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5598 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5599 && !slp)
5601 *vec_stmt = NULL;
5602 return true;
5605 if (slp)
5607 grouped_store = false;
5608 /* VEC_NUM is the number of vect stmts to be created for this
5609 group. */
5610 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5611 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5612 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
5613 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5614 op = gimple_assign_rhs1 (first_stmt);
5616 else
5617 /* VEC_NUM is the number of vect stmts to be created for this
5618 group. */
5619 vec_num = group_size;
5621 else
5623 first_stmt = stmt;
5624 first_dr = dr;
5625 group_size = vec_num = 1;
5628 if (dump_enabled_p ())
5629 dump_printf_loc (MSG_NOTE, vect_location,
5630 "transform store. ncopies = %d\n", ncopies);
5632 if (STMT_VINFO_STRIDED_P (stmt_info))
5634 gimple_stmt_iterator incr_gsi;
5635 bool insert_after;
5636 gimple *incr;
5637 tree offvar;
5638 tree ivstep;
5639 tree running_off;
5640 gimple_seq stmts = NULL;
5641 tree stride_base, stride_step, alias_off;
5642 tree vec_oprnd;
5643 unsigned int g;
5645 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5647 stride_base
5648 = fold_build_pointer_plus
5649 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
5650 size_binop (PLUS_EXPR,
5651 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5652 convert_to_ptrofftype (DR_INIT(first_dr))));
5653 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
5655 /* For a store with loop-invariant (but other than power-of-2)
5656 stride (i.e. not a grouped access) like so:
5658 for (i = 0; i < n; i += stride)
5659 array[i] = ...;
5661 we generate a new induction variable and new stores from
5662 the components of the (vectorized) rhs:
5664 for (j = 0; ; j += VF*stride)
5665 vectemp = ...;
5666 tmp1 = vectemp[0];
5667 array[j] = tmp1;
5668 tmp2 = vectemp[1];
5669 array[j + stride] = tmp2;
5673 unsigned nstores = nunits;
5674 tree ltype = elem_type;
5675 if (slp)
5677 nstores = nunits / group_size;
5678 if (group_size < nunits)
5679 ltype = build_vector_type (elem_type, group_size);
5680 else
5681 ltype = vectype;
5682 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
5683 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5684 group_size = 1;
5687 ivstep = stride_step;
5688 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5689 build_int_cst (TREE_TYPE (ivstep),
5690 ncopies * nstores));
5692 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5694 create_iv (stride_base, ivstep, NULL,
5695 loop, &incr_gsi, insert_after,
5696 &offvar, NULL);
5697 incr = gsi_stmt (incr_gsi);
5698 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
5700 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5701 if (stmts)
5702 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5704 prev_stmt_info = NULL;
5705 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
5706 next_stmt = first_stmt;
5707 for (g = 0; g < group_size; g++)
5709 running_off = offvar;
5710 if (g)
5712 tree size = TYPE_SIZE_UNIT (ltype);
5713 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
5714 size);
5715 tree newoff = copy_ssa_name (running_off, NULL);
5716 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5717 running_off, pos);
5718 vect_finish_stmt_generation (stmt, incr, gsi);
5719 running_off = newoff;
5721 for (j = 0; j < ncopies; j++)
5723 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5724 and first_stmt == stmt. */
5725 if (j == 0)
5727 if (slp)
5729 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
5730 slp_node, -1);
5731 vec_oprnd = vec_oprnds[0];
5733 else
5735 gcc_assert (gimple_assign_single_p (next_stmt));
5736 op = gimple_assign_rhs1 (next_stmt);
5737 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
5740 else
5742 if (slp)
5743 vec_oprnd = vec_oprnds[j];
5744 else
5746 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
5747 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
5751 for (i = 0; i < nstores; i++)
5753 tree newref, newoff;
5754 gimple *incr, *assign;
5755 tree size = TYPE_SIZE (ltype);
5756 /* Extract the i'th component. */
5757 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
5758 bitsize_int (i), size);
5759 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
5760 size, pos);
5762 elem = force_gimple_operand_gsi (gsi, elem, true,
5763 NULL_TREE, true,
5764 GSI_SAME_STMT);
5766 newref = build2 (MEM_REF, ltype,
5767 running_off, alias_off);
5769 /* And store it to *running_off. */
5770 assign = gimple_build_assign (newref, elem);
5771 vect_finish_stmt_generation (stmt, assign, gsi);
5773 newoff = copy_ssa_name (running_off, NULL);
5774 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5775 running_off, stride_step);
5776 vect_finish_stmt_generation (stmt, incr, gsi);
5778 running_off = newoff;
5779 if (g == group_size - 1
5780 && !slp)
5782 if (j == 0 && i == 0)
5783 STMT_VINFO_VEC_STMT (stmt_info)
5784 = *vec_stmt = assign;
5785 else
5786 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
5787 prev_stmt_info = vinfo_for_stmt (assign);
5791 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5793 return true;
5796 dr_chain.create (group_size);
5797 oprnds.create (group_size);
5799 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5800 gcc_assert (alignment_support_scheme);
5801 /* Targets with store-lane instructions must not require explicit
5802 realignment. */
5803 gcc_assert (!store_lanes_p
5804 || alignment_support_scheme == dr_aligned
5805 || alignment_support_scheme == dr_unaligned_supported);
5807 if (negative)
5808 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5810 if (store_lanes_p)
5811 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5812 else
5813 aggr_type = vectype;
5815 /* In case the vectorization factor (VF) is bigger than the number
5816 of elements that we can fit in a vectype (nunits), we have to generate
5817 more than one vector stmt - i.e - we need to "unroll" the
5818 vector stmt by a factor VF/nunits. For more details see documentation in
5819 vect_get_vec_def_for_copy_stmt. */
5821 /* In case of interleaving (non-unit grouped access):
5823 S1: &base + 2 = x2
5824 S2: &base = x0
5825 S3: &base + 1 = x1
5826 S4: &base + 3 = x3
5828 We create vectorized stores starting from base address (the access of the
5829 first stmt in the chain (S2 in the above example), when the last store stmt
5830 of the chain (S4) is reached:
5832 VS1: &base = vx2
5833 VS2: &base + vec_size*1 = vx0
5834 VS3: &base + vec_size*2 = vx1
5835 VS4: &base + vec_size*3 = vx3
5837 Then permutation statements are generated:
5839 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5840 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5843 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5844 (the order of the data-refs in the output of vect_permute_store_chain
5845 corresponds to the order of scalar stmts in the interleaving chain - see
5846 the documentation of vect_permute_store_chain()).
5848 In case of both multiple types and interleaving, above vector stores and
5849 permutation stmts are created for every copy. The result vector stmts are
5850 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5851 STMT_VINFO_RELATED_STMT for the next copies.
5854 prev_stmt_info = NULL;
5855 for (j = 0; j < ncopies; j++)
5858 if (j == 0)
5860 if (slp)
5862 /* Get vectorized arguments for SLP_NODE. */
5863 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5864 NULL, slp_node, -1);
5866 vec_oprnd = vec_oprnds[0];
5868 else
5870 /* For interleaved stores we collect vectorized defs for all the
5871 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5872 used as an input to vect_permute_store_chain(), and OPRNDS as
5873 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5875 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5876 OPRNDS are of size 1. */
5877 next_stmt = first_stmt;
5878 for (i = 0; i < group_size; i++)
5880 /* Since gaps are not supported for interleaved stores,
5881 GROUP_SIZE is the exact number of stmts in the chain.
5882 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5883 there is no interleaving, GROUP_SIZE is 1, and only one
5884 iteration of the loop will be executed. */
5885 gcc_assert (next_stmt
5886 && gimple_assign_single_p (next_stmt));
5887 op = gimple_assign_rhs1 (next_stmt);
5889 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
5890 dr_chain.quick_push (vec_oprnd);
5891 oprnds.quick_push (vec_oprnd);
5892 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5896 /* We should have catched mismatched types earlier. */
5897 gcc_assert (useless_type_conversion_p (vectype,
5898 TREE_TYPE (vec_oprnd)));
5899 bool simd_lane_access_p
5900 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5901 if (simd_lane_access_p
5902 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5903 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5904 && integer_zerop (DR_OFFSET (first_dr))
5905 && integer_zerop (DR_INIT (first_dr))
5906 && alias_sets_conflict_p (get_alias_set (aggr_type),
5907 get_alias_set (DR_REF (first_dr))))
5909 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5910 dataref_offset = build_int_cst (reference_alias_ptr_type
5911 (DR_REF (first_dr)), 0);
5912 inv_p = false;
5914 else
5915 dataref_ptr
5916 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5917 simd_lane_access_p ? loop : NULL,
5918 offset, &dummy, gsi, &ptr_incr,
5919 simd_lane_access_p, &inv_p);
5920 gcc_assert (bb_vinfo || !inv_p);
5922 else
5924 /* For interleaved stores we created vectorized defs for all the
5925 defs stored in OPRNDS in the previous iteration (previous copy).
5926 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5927 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5928 next copy.
5929 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5930 OPRNDS are of size 1. */
5931 for (i = 0; i < group_size; i++)
5933 op = oprnds[i];
5934 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
5935 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5936 dr_chain[i] = vec_oprnd;
5937 oprnds[i] = vec_oprnd;
5939 if (dataref_offset)
5940 dataref_offset
5941 = int_const_binop (PLUS_EXPR, dataref_offset,
5942 TYPE_SIZE_UNIT (aggr_type));
5943 else
5944 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5945 TYPE_SIZE_UNIT (aggr_type));
5948 if (store_lanes_p)
5950 tree vec_array;
5952 /* Combine all the vectors into an array. */
5953 vec_array = create_vector_array (vectype, vec_num);
5954 for (i = 0; i < vec_num; i++)
5956 vec_oprnd = dr_chain[i];
5957 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5960 /* Emit:
5961 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5962 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5963 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5964 gimple_call_set_lhs (new_stmt, data_ref);
5965 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5967 else
5969 new_stmt = NULL;
5970 if (grouped_store)
5972 if (j == 0)
5973 result_chain.create (group_size);
5974 /* Permute. */
5975 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5976 &result_chain);
5979 next_stmt = first_stmt;
5980 for (i = 0; i < vec_num; i++)
5982 unsigned align, misalign;
5984 if (i > 0)
5985 /* Bump the vector pointer. */
5986 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5987 stmt, NULL_TREE);
5989 if (slp)
5990 vec_oprnd = vec_oprnds[i];
5991 else if (grouped_store)
5992 /* For grouped stores vectorized defs are interleaved in
5993 vect_permute_store_chain(). */
5994 vec_oprnd = result_chain[i];
5996 data_ref = fold_build2 (MEM_REF, TREE_TYPE (vec_oprnd),
5997 dataref_ptr,
5998 dataref_offset
5999 ? dataref_offset
6000 : build_int_cst (reference_alias_ptr_type
6001 (DR_REF (first_dr)), 0));
6002 align = TYPE_ALIGN_UNIT (vectype);
6003 if (aligned_access_p (first_dr))
6004 misalign = 0;
6005 else if (DR_MISALIGNMENT (first_dr) == -1)
6007 if (DR_VECT_AUX (first_dr)->base_element_aligned)
6008 align = TYPE_ALIGN_UNIT (elem_type);
6009 else
6010 align = get_object_alignment (DR_REF (first_dr))
6011 / BITS_PER_UNIT;
6012 misalign = 0;
6013 TREE_TYPE (data_ref)
6014 = build_aligned_type (TREE_TYPE (data_ref),
6015 align * BITS_PER_UNIT);
6017 else
6019 TREE_TYPE (data_ref)
6020 = build_aligned_type (TREE_TYPE (data_ref),
6021 TYPE_ALIGN (elem_type));
6022 misalign = DR_MISALIGNMENT (first_dr);
6024 if (dataref_offset == NULL_TREE
6025 && TREE_CODE (dataref_ptr) == SSA_NAME)
6026 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6027 misalign);
6029 if (negative
6030 && dt != vect_constant_def
6031 && dt != vect_external_def)
6033 tree perm_mask = perm_mask_for_reverse (vectype);
6034 tree perm_dest
6035 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6036 vectype);
6037 tree new_temp = make_ssa_name (perm_dest);
6039 /* Generate the permute statement. */
6040 gimple *perm_stmt
6041 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6042 vec_oprnd, perm_mask);
6043 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6045 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6046 vec_oprnd = new_temp;
6049 /* Arguments are ready. Create the new vector stmt. */
6050 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6051 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6053 if (slp)
6054 continue;
6056 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6057 if (!next_stmt)
6058 break;
6061 if (!slp)
6063 if (j == 0)
6064 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6065 else
6066 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6067 prev_stmt_info = vinfo_for_stmt (new_stmt);
6071 dr_chain.release ();
6072 oprnds.release ();
6073 result_chain.release ();
6074 vec_oprnds.release ();
6076 return true;
6079 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6080 VECTOR_CST mask. No checks are made that the target platform supports the
6081 mask, so callers may wish to test can_vec_perm_p separately, or use
6082 vect_gen_perm_mask_checked. */
6084 tree
6085 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
6087 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
6088 int i, nunits;
6090 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6092 mask_elt_type = lang_hooks.types.type_for_mode
6093 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
6094 mask_type = get_vectype_for_scalar_type (mask_elt_type);
6096 mask_elts = XALLOCAVEC (tree, nunits);
6097 for (i = nunits - 1; i >= 0; i--)
6098 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
6099 mask_vec = build_vector (mask_type, mask_elts);
6101 return mask_vec;
6104 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6105 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6107 tree
6108 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
6110 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
6111 return vect_gen_perm_mask_any (vectype, sel);
6114 /* Given a vector variable X and Y, that was generated for the scalar
6115 STMT, generate instructions to permute the vector elements of X and Y
6116 using permutation mask MASK_VEC, insert them at *GSI and return the
6117 permuted vector variable. */
6119 static tree
6120 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6121 gimple_stmt_iterator *gsi)
6123 tree vectype = TREE_TYPE (x);
6124 tree perm_dest, data_ref;
6125 gimple *perm_stmt;
6127 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6128 data_ref = make_ssa_name (perm_dest);
6130 /* Generate the permute statement. */
6131 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6132 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6134 return data_ref;
6137 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6138 inserting them on the loops preheader edge. Returns true if we
6139 were successful in doing so (and thus STMT can be moved then),
6140 otherwise returns false. */
6142 static bool
6143 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6145 ssa_op_iter i;
6146 tree op;
6147 bool any = false;
6149 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6151 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6152 if (!gimple_nop_p (def_stmt)
6153 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6155 /* Make sure we don't need to recurse. While we could do
6156 so in simple cases when there are more complex use webs
6157 we don't have an easy way to preserve stmt order to fulfil
6158 dependencies within them. */
6159 tree op2;
6160 ssa_op_iter i2;
6161 if (gimple_code (def_stmt) == GIMPLE_PHI)
6162 return false;
6163 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6165 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6166 if (!gimple_nop_p (def_stmt2)
6167 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6168 return false;
6170 any = true;
6174 if (!any)
6175 return true;
6177 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6179 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6180 if (!gimple_nop_p (def_stmt)
6181 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6183 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6184 gsi_remove (&gsi, false);
6185 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6189 return true;
6192 /* vectorizable_load.
6194 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6195 can be vectorized.
6196 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6197 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6198 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6200 static bool
6201 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6202 slp_tree slp_node, slp_instance slp_node_instance)
6204 tree scalar_dest;
6205 tree vec_dest = NULL;
6206 tree data_ref = NULL;
6207 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6208 stmt_vec_info prev_stmt_info;
6209 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6210 struct loop *loop = NULL;
6211 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6212 bool nested_in_vect_loop = false;
6213 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6214 tree elem_type;
6215 tree new_temp;
6216 machine_mode mode;
6217 gimple *new_stmt = NULL;
6218 tree dummy;
6219 enum dr_alignment_support alignment_support_scheme;
6220 tree dataref_ptr = NULL_TREE;
6221 tree dataref_offset = NULL_TREE;
6222 gimple *ptr_incr = NULL;
6223 int ncopies;
6224 int i, j, group_size = -1, group_gap_adj;
6225 tree msq = NULL_TREE, lsq;
6226 tree offset = NULL_TREE;
6227 tree byte_offset = NULL_TREE;
6228 tree realignment_token = NULL_TREE;
6229 gphi *phi = NULL;
6230 vec<tree> dr_chain = vNULL;
6231 bool grouped_load = false;
6232 bool load_lanes_p = false;
6233 gimple *first_stmt;
6234 gimple *first_stmt_for_drptr = NULL;
6235 bool inv_p;
6236 bool negative = false;
6237 bool compute_in_loop = false;
6238 struct loop *at_loop;
6239 int vec_num;
6240 bool slp = (slp_node != NULL);
6241 bool slp_perm = false;
6242 enum tree_code code;
6243 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6244 int vf;
6245 tree aggr_type;
6246 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
6247 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
6248 int gather_scale = 1;
6249 enum vect_def_type gather_dt = vect_unknown_def_type;
6250 vec_info *vinfo = stmt_info->vinfo;
6252 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6253 return false;
6255 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6256 && ! vec_stmt)
6257 return false;
6259 /* Is vectorizable load? */
6260 if (!is_gimple_assign (stmt))
6261 return false;
6263 scalar_dest = gimple_assign_lhs (stmt);
6264 if (TREE_CODE (scalar_dest) != SSA_NAME)
6265 return false;
6267 code = gimple_assign_rhs_code (stmt);
6268 if (code != ARRAY_REF
6269 && code != BIT_FIELD_REF
6270 && code != INDIRECT_REF
6271 && code != COMPONENT_REF
6272 && code != IMAGPART_EXPR
6273 && code != REALPART_EXPR
6274 && code != MEM_REF
6275 && TREE_CODE_CLASS (code) != tcc_declaration)
6276 return false;
6278 if (!STMT_VINFO_DATA_REF (stmt_info))
6279 return false;
6281 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6282 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6284 if (loop_vinfo)
6286 loop = LOOP_VINFO_LOOP (loop_vinfo);
6287 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6288 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6290 else
6291 vf = 1;
6293 /* Multiple types in SLP are handled by creating the appropriate number of
6294 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6295 case of SLP. */
6296 if (slp || PURE_SLP_STMT (stmt_info))
6297 ncopies = 1;
6298 else
6299 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6301 gcc_assert (ncopies >= 1);
6303 /* FORNOW. This restriction should be relaxed. */
6304 if (nested_in_vect_loop && ncopies > 1)
6306 if (dump_enabled_p ())
6307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6308 "multiple types in nested loop.\n");
6309 return false;
6312 /* Invalidate assumptions made by dependence analysis when vectorization
6313 on the unrolled body effectively re-orders stmts. */
6314 if (ncopies > 1
6315 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6316 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6317 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6319 if (dump_enabled_p ())
6320 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6321 "cannot perform implicit CSE when unrolling "
6322 "with negative dependence distance\n");
6323 return false;
6326 elem_type = TREE_TYPE (vectype);
6327 mode = TYPE_MODE (vectype);
6329 /* FORNOW. In some cases can vectorize even if data-type not supported
6330 (e.g. - data copies). */
6331 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6333 if (dump_enabled_p ())
6334 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6335 "Aligned load, but unsupported type.\n");
6336 return false;
6339 /* Check if the load is a part of an interleaving chain. */
6340 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6342 grouped_load = true;
6343 /* FORNOW */
6344 gcc_assert (!nested_in_vect_loop && !STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6346 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6348 /* If this is single-element interleaving with an element distance
6349 that leaves unused vector loads around punt - we at least create
6350 very sub-optimal code in that case (and blow up memory,
6351 see PR65518). */
6352 bool force_peeling = false;
6353 if (first_stmt == stmt
6354 && !GROUP_NEXT_ELEMENT (stmt_info))
6356 if (GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
6358 if (dump_enabled_p ())
6359 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6360 "single-element interleaving not supported "
6361 "for not adjacent vector loads\n");
6362 return false;
6365 /* Single-element interleaving requires peeling for gaps. */
6366 force_peeling = true;
6369 /* If there is a gap in the end of the group or the group size cannot
6370 be made a multiple of the vector element count then we access excess
6371 elements in the last iteration and thus need to peel that off. */
6372 if (loop_vinfo
6373 && ! STMT_VINFO_STRIDED_P (stmt_info)
6374 && (force_peeling
6375 || GROUP_GAP (vinfo_for_stmt (first_stmt)) != 0
6376 || (!slp && vf % GROUP_SIZE (vinfo_for_stmt (first_stmt)) != 0)))
6378 if (dump_enabled_p ())
6379 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6380 "Data access with gaps requires scalar "
6381 "epilogue loop\n");
6382 if (loop->inner)
6384 if (dump_enabled_p ())
6385 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6386 "Peeling for outer loop is not supported\n");
6387 return false;
6390 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
6393 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6394 slp_perm = true;
6396 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6397 if (!slp
6398 && !PURE_SLP_STMT (stmt_info)
6399 && !STMT_VINFO_STRIDED_P (stmt_info))
6401 if (vect_load_lanes_supported (vectype, group_size))
6402 load_lanes_p = true;
6403 else if (!vect_grouped_load_supported (vectype, group_size))
6404 return false;
6407 /* Invalidate assumptions made by dependence analysis when vectorization
6408 on the unrolled body effectively re-orders stmts. */
6409 if (!PURE_SLP_STMT (stmt_info)
6410 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6411 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6412 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6414 if (dump_enabled_p ())
6415 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6416 "cannot perform implicit CSE when performing "
6417 "group loads with negative dependence distance\n");
6418 return false;
6421 /* Similarly when the stmt is a load that is both part of a SLP
6422 instance and a loop vectorized stmt via the same-dr mechanism
6423 we have to give up. */
6424 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6425 && (STMT_SLP_TYPE (stmt_info)
6426 != STMT_SLP_TYPE (vinfo_for_stmt
6427 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6429 if (dump_enabled_p ())
6430 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6431 "conflicting SLP types for CSEd load\n");
6432 return false;
6437 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6439 gimple *def_stmt;
6440 gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
6441 &gather_off, &gather_scale);
6442 gcc_assert (gather_decl);
6443 if (!vect_is_simple_use (gather_off, vinfo, &def_stmt, &gather_dt,
6444 &gather_off_vectype))
6446 if (dump_enabled_p ())
6447 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6448 "gather index use not simple.\n");
6449 return false;
6452 else if (STMT_VINFO_STRIDED_P (stmt_info))
6454 if ((grouped_load
6455 && (slp || PURE_SLP_STMT (stmt_info)))
6456 && (group_size > nunits
6457 || nunits % group_size != 0))
6459 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6460 "unhandled strided group load\n");
6461 return false;
6464 else
6466 negative = tree_int_cst_compare (nested_in_vect_loop
6467 ? STMT_VINFO_DR_STEP (stmt_info)
6468 : DR_STEP (dr),
6469 size_zero_node) < 0;
6470 if (negative && ncopies > 1)
6472 if (dump_enabled_p ())
6473 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6474 "multiple types with negative step.\n");
6475 return false;
6478 if (negative)
6480 if (grouped_load)
6482 if (dump_enabled_p ())
6483 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6484 "negative step for group load not supported"
6485 "\n");
6486 return false;
6488 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
6489 if (alignment_support_scheme != dr_aligned
6490 && alignment_support_scheme != dr_unaligned_supported)
6492 if (dump_enabled_p ())
6493 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6494 "negative step but alignment required.\n");
6495 return false;
6497 if (!perm_mask_for_reverse (vectype))
6499 if (dump_enabled_p ())
6500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6501 "negative step and reversing not supported."
6502 "\n");
6503 return false;
6508 if (!vec_stmt) /* transformation not required. */
6510 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6511 /* The SLP costs are calculated during SLP analysis. */
6512 if (!PURE_SLP_STMT (stmt_info))
6513 vect_model_load_cost (stmt_info, ncopies, load_lanes_p,
6514 NULL, NULL, NULL);
6515 return true;
6518 if (dump_enabled_p ())
6519 dump_printf_loc (MSG_NOTE, vect_location,
6520 "transform load. ncopies = %d\n", ncopies);
6522 /** Transform. **/
6524 ensure_base_align (stmt_info, dr);
6526 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6528 tree vec_oprnd0 = NULL_TREE, op;
6529 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
6530 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6531 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6532 edge pe = loop_preheader_edge (loop);
6533 gimple_seq seq;
6534 basic_block new_bb;
6535 enum { NARROW, NONE, WIDEN } modifier;
6536 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
6538 if (nunits == gather_off_nunits)
6539 modifier = NONE;
6540 else if (nunits == gather_off_nunits / 2)
6542 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6543 modifier = WIDEN;
6545 for (i = 0; i < gather_off_nunits; ++i)
6546 sel[i] = i | nunits;
6548 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
6550 else if (nunits == gather_off_nunits * 2)
6552 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6553 modifier = NARROW;
6555 for (i = 0; i < nunits; ++i)
6556 sel[i] = i < gather_off_nunits
6557 ? i : i + nunits - gather_off_nunits;
6559 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6560 ncopies *= 2;
6562 else
6563 gcc_unreachable ();
6565 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
6566 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6567 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6568 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6569 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6570 scaletype = TREE_VALUE (arglist);
6571 gcc_checking_assert (types_compatible_p (srctype, rettype));
6573 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6575 ptr = fold_convert (ptrtype, gather_base);
6576 if (!is_gimple_min_invariant (ptr))
6578 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6579 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6580 gcc_assert (!new_bb);
6583 /* Currently we support only unconditional gather loads,
6584 so mask should be all ones. */
6585 if (TREE_CODE (masktype) == INTEGER_TYPE)
6586 mask = build_int_cst (masktype, -1);
6587 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6589 mask = build_int_cst (TREE_TYPE (masktype), -1);
6590 mask = build_vector_from_val (masktype, mask);
6591 mask = vect_init_vector (stmt, mask, masktype, NULL);
6593 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6595 REAL_VALUE_TYPE r;
6596 long tmp[6];
6597 for (j = 0; j < 6; ++j)
6598 tmp[j] = -1;
6599 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6600 mask = build_real (TREE_TYPE (masktype), r);
6601 mask = build_vector_from_val (masktype, mask);
6602 mask = vect_init_vector (stmt, mask, masktype, NULL);
6604 else
6605 gcc_unreachable ();
6607 scale = build_int_cst (scaletype, gather_scale);
6609 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6610 merge = build_int_cst (TREE_TYPE (rettype), 0);
6611 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6613 REAL_VALUE_TYPE r;
6614 long tmp[6];
6615 for (j = 0; j < 6; ++j)
6616 tmp[j] = 0;
6617 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6618 merge = build_real (TREE_TYPE (rettype), r);
6620 else
6621 gcc_unreachable ();
6622 merge = build_vector_from_val (rettype, merge);
6623 merge = vect_init_vector (stmt, merge, rettype, NULL);
6625 prev_stmt_info = NULL;
6626 for (j = 0; j < ncopies; ++j)
6628 if (modifier == WIDEN && (j & 1))
6629 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6630 perm_mask, stmt, gsi);
6631 else if (j == 0)
6632 op = vec_oprnd0
6633 = vect_get_vec_def_for_operand (gather_off, stmt);
6634 else
6635 op = vec_oprnd0
6636 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6638 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6640 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6641 == TYPE_VECTOR_SUBPARTS (idxtype));
6642 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6643 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6644 new_stmt
6645 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6646 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6647 op = var;
6650 new_stmt
6651 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
6653 if (!useless_type_conversion_p (vectype, rettype))
6655 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6656 == TYPE_VECTOR_SUBPARTS (rettype));
6657 op = vect_get_new_ssa_name (rettype, vect_simple_var);
6658 gimple_call_set_lhs (new_stmt, op);
6659 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6660 var = make_ssa_name (vec_dest);
6661 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6662 new_stmt
6663 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6665 else
6667 var = make_ssa_name (vec_dest, new_stmt);
6668 gimple_call_set_lhs (new_stmt, var);
6671 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6673 if (modifier == NARROW)
6675 if ((j & 1) == 0)
6677 prev_res = var;
6678 continue;
6680 var = permute_vec_elements (prev_res, var,
6681 perm_mask, stmt, gsi);
6682 new_stmt = SSA_NAME_DEF_STMT (var);
6685 if (prev_stmt_info == NULL)
6686 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6687 else
6688 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6689 prev_stmt_info = vinfo_for_stmt (new_stmt);
6691 return true;
6693 else if (STMT_VINFO_STRIDED_P (stmt_info))
6695 gimple_stmt_iterator incr_gsi;
6696 bool insert_after;
6697 gimple *incr;
6698 tree offvar;
6699 tree ivstep;
6700 tree running_off;
6701 vec<constructor_elt, va_gc> *v = NULL;
6702 gimple_seq stmts = NULL;
6703 tree stride_base, stride_step, alias_off;
6705 gcc_assert (!nested_in_vect_loop);
6707 if (slp && grouped_load)
6708 first_dr = STMT_VINFO_DATA_REF
6709 (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
6710 else
6711 first_dr = dr;
6713 stride_base
6714 = fold_build_pointer_plus
6715 (DR_BASE_ADDRESS (first_dr),
6716 size_binop (PLUS_EXPR,
6717 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6718 convert_to_ptrofftype (DR_INIT (first_dr))));
6719 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6721 /* For a load with loop-invariant (but other than power-of-2)
6722 stride (i.e. not a grouped access) like so:
6724 for (i = 0; i < n; i += stride)
6725 ... = array[i];
6727 we generate a new induction variable and new accesses to
6728 form a new vector (or vectors, depending on ncopies):
6730 for (j = 0; ; j += VF*stride)
6731 tmp1 = array[j];
6732 tmp2 = array[j + stride];
6734 vectemp = {tmp1, tmp2, ...}
6737 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6738 build_int_cst (TREE_TYPE (stride_step), vf));
6740 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6742 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
6743 loop, &incr_gsi, insert_after,
6744 &offvar, NULL);
6745 incr = gsi_stmt (incr_gsi);
6746 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6748 stride_step = force_gimple_operand (unshare_expr (stride_step),
6749 &stmts, true, NULL_TREE);
6750 if (stmts)
6751 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6753 prev_stmt_info = NULL;
6754 running_off = offvar;
6755 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
6756 int nloads = nunits;
6757 tree ltype = TREE_TYPE (vectype);
6758 auto_vec<tree> dr_chain;
6759 if (slp)
6761 nloads = nunits / group_size;
6762 if (group_size < nunits)
6763 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
6764 else
6765 ltype = vectype;
6766 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
6767 /* For SLP permutation support we need to load the whole group,
6768 not only the number of vector stmts the permutation result
6769 fits in. */
6770 if (slp_perm)
6772 ncopies = (group_size * vf + nunits - 1) / nunits;
6773 dr_chain.create (ncopies);
6775 else
6776 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6778 for (j = 0; j < ncopies; j++)
6780 tree vec_inv;
6782 if (nloads > 1)
6784 vec_alloc (v, nloads);
6785 for (i = 0; i < nloads; i++)
6787 tree newref, newoff;
6788 gimple *incr;
6789 newref = build2 (MEM_REF, ltype, running_off, alias_off);
6791 newref = force_gimple_operand_gsi (gsi, newref, true,
6792 NULL_TREE, true,
6793 GSI_SAME_STMT);
6794 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6795 newoff = copy_ssa_name (running_off);
6796 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6797 running_off, stride_step);
6798 vect_finish_stmt_generation (stmt, incr, gsi);
6800 running_off = newoff;
6803 vec_inv = build_constructor (vectype, v);
6804 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6805 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6807 else
6809 new_stmt = gimple_build_assign (make_ssa_name (ltype),
6810 build2 (MEM_REF, ltype,
6811 running_off, alias_off));
6812 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6814 tree newoff = copy_ssa_name (running_off);
6815 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6816 running_off, stride_step);
6817 vect_finish_stmt_generation (stmt, incr, gsi);
6819 running_off = newoff;
6822 if (slp)
6824 if (slp_perm)
6825 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
6826 else
6827 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6829 else
6831 if (j == 0)
6832 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6833 else
6834 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6835 prev_stmt_info = vinfo_for_stmt (new_stmt);
6838 if (slp_perm)
6839 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6840 slp_node_instance, false);
6841 return true;
6844 if (grouped_load)
6846 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6847 /* For SLP vectorization we directly vectorize a subchain
6848 without permutation. */
6849 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6850 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6851 /* For BB vectorization always use the first stmt to base
6852 the data ref pointer on. */
6853 if (bb_vinfo)
6854 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6856 /* Check if the chain of loads is already vectorized. */
6857 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6858 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6859 ??? But we can only do so if there is exactly one
6860 as we have no way to get at the rest. Leave the CSE
6861 opportunity alone.
6862 ??? With the group load eventually participating
6863 in multiple different permutations (having multiple
6864 slp nodes which refer to the same group) the CSE
6865 is even wrong code. See PR56270. */
6866 && !slp)
6868 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6869 return true;
6871 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6872 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6873 group_gap_adj = 0;
6875 /* VEC_NUM is the number of vect stmts to be created for this group. */
6876 if (slp)
6878 grouped_load = false;
6879 /* For SLP permutation support we need to load the whole group,
6880 not only the number of vector stmts the permutation result
6881 fits in. */
6882 if (slp_perm)
6883 vec_num = (group_size * vf + nunits - 1) / nunits;
6884 else
6885 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6886 group_gap_adj = vf * group_size - nunits * vec_num;
6888 else
6889 vec_num = group_size;
6891 else
6893 first_stmt = stmt;
6894 first_dr = dr;
6895 group_size = vec_num = 1;
6896 group_gap_adj = 0;
6899 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6900 gcc_assert (alignment_support_scheme);
6901 /* Targets with load-lane instructions must not require explicit
6902 realignment. */
6903 gcc_assert (!load_lanes_p
6904 || alignment_support_scheme == dr_aligned
6905 || alignment_support_scheme == dr_unaligned_supported);
6907 /* In case the vectorization factor (VF) is bigger than the number
6908 of elements that we can fit in a vectype (nunits), we have to generate
6909 more than one vector stmt - i.e - we need to "unroll" the
6910 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6911 from one copy of the vector stmt to the next, in the field
6912 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6913 stages to find the correct vector defs to be used when vectorizing
6914 stmts that use the defs of the current stmt. The example below
6915 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6916 need to create 4 vectorized stmts):
6918 before vectorization:
6919 RELATED_STMT VEC_STMT
6920 S1: x = memref - -
6921 S2: z = x + 1 - -
6923 step 1: vectorize stmt S1:
6924 We first create the vector stmt VS1_0, and, as usual, record a
6925 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6926 Next, we create the vector stmt VS1_1, and record a pointer to
6927 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6928 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6929 stmts and pointers:
6930 RELATED_STMT VEC_STMT
6931 VS1_0: vx0 = memref0 VS1_1 -
6932 VS1_1: vx1 = memref1 VS1_2 -
6933 VS1_2: vx2 = memref2 VS1_3 -
6934 VS1_3: vx3 = memref3 - -
6935 S1: x = load - VS1_0
6936 S2: z = x + 1 - -
6938 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6939 information we recorded in RELATED_STMT field is used to vectorize
6940 stmt S2. */
6942 /* In case of interleaving (non-unit grouped access):
6944 S1: x2 = &base + 2
6945 S2: x0 = &base
6946 S3: x1 = &base + 1
6947 S4: x3 = &base + 3
6949 Vectorized loads are created in the order of memory accesses
6950 starting from the access of the first stmt of the chain:
6952 VS1: vx0 = &base
6953 VS2: vx1 = &base + vec_size*1
6954 VS3: vx3 = &base + vec_size*2
6955 VS4: vx4 = &base + vec_size*3
6957 Then permutation statements are generated:
6959 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6960 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6963 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6964 (the order of the data-refs in the output of vect_permute_load_chain
6965 corresponds to the order of scalar stmts in the interleaving chain - see
6966 the documentation of vect_permute_load_chain()).
6967 The generation of permutation stmts and recording them in
6968 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6970 In case of both multiple types and interleaving, the vector loads and
6971 permutation stmts above are created for every copy. The result vector
6972 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6973 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6975 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6976 on a target that supports unaligned accesses (dr_unaligned_supported)
6977 we generate the following code:
6978 p = initial_addr;
6979 indx = 0;
6980 loop {
6981 p = p + indx * vectype_size;
6982 vec_dest = *(p);
6983 indx = indx + 1;
6986 Otherwise, the data reference is potentially unaligned on a target that
6987 does not support unaligned accesses (dr_explicit_realign_optimized) -
6988 then generate the following code, in which the data in each iteration is
6989 obtained by two vector loads, one from the previous iteration, and one
6990 from the current iteration:
6991 p1 = initial_addr;
6992 msq_init = *(floor(p1))
6993 p2 = initial_addr + VS - 1;
6994 realignment_token = call target_builtin;
6995 indx = 0;
6996 loop {
6997 p2 = p2 + indx * vectype_size
6998 lsq = *(floor(p2))
6999 vec_dest = realign_load (msq, lsq, realignment_token)
7000 indx = indx + 1;
7001 msq = lsq;
7002 } */
7004 /* If the misalignment remains the same throughout the execution of the
7005 loop, we can create the init_addr and permutation mask at the loop
7006 preheader. Otherwise, it needs to be created inside the loop.
7007 This can only occur when vectorizing memory accesses in the inner-loop
7008 nested within an outer-loop that is being vectorized. */
7010 if (nested_in_vect_loop
7011 && (TREE_INT_CST_LOW (DR_STEP (dr))
7012 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
7014 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7015 compute_in_loop = true;
7018 if ((alignment_support_scheme == dr_explicit_realign_optimized
7019 || alignment_support_scheme == dr_explicit_realign)
7020 && !compute_in_loop)
7022 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7023 alignment_support_scheme, NULL_TREE,
7024 &at_loop);
7025 if (alignment_support_scheme == dr_explicit_realign_optimized)
7027 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7028 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7029 size_one_node);
7032 else
7033 at_loop = loop;
7035 if (negative)
7036 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7038 if (load_lanes_p)
7039 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7040 else
7041 aggr_type = vectype;
7043 prev_stmt_info = NULL;
7044 for (j = 0; j < ncopies; j++)
7046 /* 1. Create the vector or array pointer update chain. */
7047 if (j == 0)
7049 bool simd_lane_access_p
7050 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7051 if (simd_lane_access_p
7052 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7053 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7054 && integer_zerop (DR_OFFSET (first_dr))
7055 && integer_zerop (DR_INIT (first_dr))
7056 && alias_sets_conflict_p (get_alias_set (aggr_type),
7057 get_alias_set (DR_REF (first_dr)))
7058 && (alignment_support_scheme == dr_aligned
7059 || alignment_support_scheme == dr_unaligned_supported))
7061 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7062 dataref_offset = build_int_cst (reference_alias_ptr_type
7063 (DR_REF (first_dr)), 0);
7064 inv_p = false;
7066 else if (first_stmt_for_drptr
7067 && first_stmt != first_stmt_for_drptr)
7069 dataref_ptr
7070 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7071 at_loop, offset, &dummy, gsi,
7072 &ptr_incr, simd_lane_access_p,
7073 &inv_p, byte_offset);
7074 /* Adjust the pointer by the difference to first_stmt. */
7075 data_reference_p ptrdr
7076 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7077 tree diff = fold_convert (sizetype,
7078 size_binop (MINUS_EXPR,
7079 DR_INIT (first_dr),
7080 DR_INIT (ptrdr)));
7081 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7082 stmt, diff);
7084 else
7085 dataref_ptr
7086 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7087 offset, &dummy, gsi, &ptr_incr,
7088 simd_lane_access_p, &inv_p,
7089 byte_offset);
7091 else if (dataref_offset)
7092 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7093 TYPE_SIZE_UNIT (aggr_type));
7094 else
7095 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7096 TYPE_SIZE_UNIT (aggr_type));
7098 if (grouped_load || slp_perm)
7099 dr_chain.create (vec_num);
7101 if (load_lanes_p)
7103 tree vec_array;
7105 vec_array = create_vector_array (vectype, vec_num);
7107 /* Emit:
7108 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7109 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
7110 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
7111 gimple_call_set_lhs (new_stmt, vec_array);
7112 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7114 /* Extract each vector into an SSA_NAME. */
7115 for (i = 0; i < vec_num; i++)
7117 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7118 vec_array, i);
7119 dr_chain.quick_push (new_temp);
7122 /* Record the mapping between SSA_NAMEs and statements. */
7123 vect_record_grouped_load_vectors (stmt, dr_chain);
7125 else
7127 for (i = 0; i < vec_num; i++)
7129 if (i > 0)
7130 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7131 stmt, NULL_TREE);
7133 /* 2. Create the vector-load in the loop. */
7134 switch (alignment_support_scheme)
7136 case dr_aligned:
7137 case dr_unaligned_supported:
7139 unsigned int align, misalign;
7141 data_ref
7142 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7143 dataref_offset
7144 ? dataref_offset
7145 : build_int_cst (reference_alias_ptr_type
7146 (DR_REF (first_dr)), 0));
7147 align = TYPE_ALIGN_UNIT (vectype);
7148 if (alignment_support_scheme == dr_aligned)
7150 gcc_assert (aligned_access_p (first_dr));
7151 misalign = 0;
7153 else if (DR_MISALIGNMENT (first_dr) == -1)
7155 if (DR_VECT_AUX (first_dr)->base_element_aligned)
7156 align = TYPE_ALIGN_UNIT (elem_type);
7157 else
7158 align = (get_object_alignment (DR_REF (first_dr))
7159 / BITS_PER_UNIT);
7160 misalign = 0;
7161 TREE_TYPE (data_ref)
7162 = build_aligned_type (TREE_TYPE (data_ref),
7163 align * BITS_PER_UNIT);
7165 else
7167 TREE_TYPE (data_ref)
7168 = build_aligned_type (TREE_TYPE (data_ref),
7169 TYPE_ALIGN (elem_type));
7170 misalign = DR_MISALIGNMENT (first_dr);
7172 if (dataref_offset == NULL_TREE
7173 && TREE_CODE (dataref_ptr) == SSA_NAME)
7174 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7175 align, misalign);
7176 break;
7178 case dr_explicit_realign:
7180 tree ptr, bump;
7182 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7184 if (compute_in_loop)
7185 msq = vect_setup_realignment (first_stmt, gsi,
7186 &realignment_token,
7187 dr_explicit_realign,
7188 dataref_ptr, NULL);
7190 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7191 ptr = copy_ssa_name (dataref_ptr);
7192 else
7193 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7194 new_stmt = gimple_build_assign
7195 (ptr, BIT_AND_EXPR, dataref_ptr,
7196 build_int_cst
7197 (TREE_TYPE (dataref_ptr),
7198 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7199 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7200 data_ref
7201 = build2 (MEM_REF, vectype, ptr,
7202 build_int_cst (reference_alias_ptr_type
7203 (DR_REF (first_dr)), 0));
7204 vec_dest = vect_create_destination_var (scalar_dest,
7205 vectype);
7206 new_stmt = gimple_build_assign (vec_dest, data_ref);
7207 new_temp = make_ssa_name (vec_dest, new_stmt);
7208 gimple_assign_set_lhs (new_stmt, new_temp);
7209 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7210 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7211 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7212 msq = new_temp;
7214 bump = size_binop (MULT_EXPR, vs,
7215 TYPE_SIZE_UNIT (elem_type));
7216 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7217 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7218 new_stmt = gimple_build_assign
7219 (NULL_TREE, BIT_AND_EXPR, ptr,
7220 build_int_cst
7221 (TREE_TYPE (ptr),
7222 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7223 ptr = copy_ssa_name (ptr, new_stmt);
7224 gimple_assign_set_lhs (new_stmt, ptr);
7225 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7226 data_ref
7227 = build2 (MEM_REF, vectype, ptr,
7228 build_int_cst (reference_alias_ptr_type
7229 (DR_REF (first_dr)), 0));
7230 break;
7232 case dr_explicit_realign_optimized:
7233 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7234 new_temp = copy_ssa_name (dataref_ptr);
7235 else
7236 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7237 new_stmt = gimple_build_assign
7238 (new_temp, BIT_AND_EXPR, dataref_ptr,
7239 build_int_cst
7240 (TREE_TYPE (dataref_ptr),
7241 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7242 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7243 data_ref
7244 = build2 (MEM_REF, vectype, new_temp,
7245 build_int_cst (reference_alias_ptr_type
7246 (DR_REF (first_dr)), 0));
7247 break;
7248 default:
7249 gcc_unreachable ();
7251 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7252 new_stmt = gimple_build_assign (vec_dest, data_ref);
7253 new_temp = make_ssa_name (vec_dest, new_stmt);
7254 gimple_assign_set_lhs (new_stmt, new_temp);
7255 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7257 /* 3. Handle explicit realignment if necessary/supported.
7258 Create in loop:
7259 vec_dest = realign_load (msq, lsq, realignment_token) */
7260 if (alignment_support_scheme == dr_explicit_realign_optimized
7261 || alignment_support_scheme == dr_explicit_realign)
7263 lsq = gimple_assign_lhs (new_stmt);
7264 if (!realignment_token)
7265 realignment_token = dataref_ptr;
7266 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7267 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7268 msq, lsq, realignment_token);
7269 new_temp = make_ssa_name (vec_dest, new_stmt);
7270 gimple_assign_set_lhs (new_stmt, new_temp);
7271 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7273 if (alignment_support_scheme == dr_explicit_realign_optimized)
7275 gcc_assert (phi);
7276 if (i == vec_num - 1 && j == ncopies - 1)
7277 add_phi_arg (phi, lsq,
7278 loop_latch_edge (containing_loop),
7279 UNKNOWN_LOCATION);
7280 msq = lsq;
7284 /* 4. Handle invariant-load. */
7285 if (inv_p && !bb_vinfo)
7287 gcc_assert (!grouped_load);
7288 /* If we have versioned for aliasing or the loop doesn't
7289 have any data dependencies that would preclude this,
7290 then we are sure this is a loop invariant load and
7291 thus we can insert it on the preheader edge. */
7292 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7293 && !nested_in_vect_loop
7294 && hoist_defs_of_uses (stmt, loop))
7296 if (dump_enabled_p ())
7298 dump_printf_loc (MSG_NOTE, vect_location,
7299 "hoisting out of the vectorized "
7300 "loop: ");
7301 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7303 tree tem = copy_ssa_name (scalar_dest);
7304 gsi_insert_on_edge_immediate
7305 (loop_preheader_edge (loop),
7306 gimple_build_assign (tem,
7307 unshare_expr
7308 (gimple_assign_rhs1 (stmt))));
7309 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7310 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7311 set_vinfo_for_stmt (new_stmt,
7312 new_stmt_vec_info (new_stmt, vinfo));
7314 else
7316 gimple_stmt_iterator gsi2 = *gsi;
7317 gsi_next (&gsi2);
7318 new_temp = vect_init_vector (stmt, scalar_dest,
7319 vectype, &gsi2);
7320 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7324 if (negative)
7326 tree perm_mask = perm_mask_for_reverse (vectype);
7327 new_temp = permute_vec_elements (new_temp, new_temp,
7328 perm_mask, stmt, gsi);
7329 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7332 /* Collect vector loads and later create their permutation in
7333 vect_transform_grouped_load (). */
7334 if (grouped_load || slp_perm)
7335 dr_chain.quick_push (new_temp);
7337 /* Store vector loads in the corresponding SLP_NODE. */
7338 if (slp && !slp_perm)
7339 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7341 /* Bump the vector pointer to account for a gap or for excess
7342 elements loaded for a permuted SLP load. */
7343 if (group_gap_adj != 0)
7345 bool ovf;
7346 tree bump
7347 = wide_int_to_tree (sizetype,
7348 wi::smul (TYPE_SIZE_UNIT (elem_type),
7349 group_gap_adj, &ovf));
7350 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7351 stmt, bump);
7355 if (slp && !slp_perm)
7356 continue;
7358 if (slp_perm)
7360 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7361 slp_node_instance, false))
7363 dr_chain.release ();
7364 return false;
7367 else
7369 if (grouped_load)
7371 if (!load_lanes_p)
7372 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7373 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7375 else
7377 if (j == 0)
7378 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7379 else
7380 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7381 prev_stmt_info = vinfo_for_stmt (new_stmt);
7384 dr_chain.release ();
7387 return true;
7390 /* Function vect_is_simple_cond.
7392 Input:
7393 LOOP - the loop that is being vectorized.
7394 COND - Condition that is checked for simple use.
7396 Output:
7397 *COMP_VECTYPE - the vector type for the comparison.
7399 Returns whether a COND can be vectorized. Checks whether
7400 condition operands are supportable using vec_is_simple_use. */
7402 static bool
7403 vect_is_simple_cond (tree cond, vec_info *vinfo, tree *comp_vectype)
7405 tree lhs, rhs;
7406 enum vect_def_type dt;
7407 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7409 /* Mask case. */
7410 if (TREE_CODE (cond) == SSA_NAME
7411 && TREE_CODE (TREE_TYPE (cond)) == BOOLEAN_TYPE)
7413 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7414 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7415 &dt, comp_vectype)
7416 || !*comp_vectype
7417 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7418 return false;
7419 return true;
7422 if (!COMPARISON_CLASS_P (cond))
7423 return false;
7425 lhs = TREE_OPERAND (cond, 0);
7426 rhs = TREE_OPERAND (cond, 1);
7428 if (TREE_CODE (lhs) == SSA_NAME)
7430 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7431 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dt, &vectype1))
7432 return false;
7434 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
7435 && TREE_CODE (lhs) != FIXED_CST)
7436 return false;
7438 if (TREE_CODE (rhs) == SSA_NAME)
7440 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7441 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dt, &vectype2))
7442 return false;
7444 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
7445 && TREE_CODE (rhs) != FIXED_CST)
7446 return false;
7448 if (vectype1 && vectype2
7449 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7450 return false;
7452 *comp_vectype = vectype1 ? vectype1 : vectype2;
7453 return true;
7456 /* vectorizable_condition.
7458 Check if STMT is conditional modify expression that can be vectorized.
7459 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7460 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7461 at GSI.
7463 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7464 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7465 else clause if it is 2).
7467 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7469 bool
7470 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7471 gimple **vec_stmt, tree reduc_def, int reduc_index,
7472 slp_tree slp_node)
7474 tree scalar_dest = NULL_TREE;
7475 tree vec_dest = NULL_TREE;
7476 tree cond_expr, then_clause, else_clause;
7477 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7478 tree comp_vectype = NULL_TREE;
7479 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7480 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7481 tree vec_compare;
7482 tree new_temp;
7483 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7484 enum vect_def_type dt, dts[4];
7485 int ncopies;
7486 enum tree_code code;
7487 stmt_vec_info prev_stmt_info = NULL;
7488 int i, j;
7489 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7490 vec<tree> vec_oprnds0 = vNULL;
7491 vec<tree> vec_oprnds1 = vNULL;
7492 vec<tree> vec_oprnds2 = vNULL;
7493 vec<tree> vec_oprnds3 = vNULL;
7494 tree vec_cmp_type;
7495 bool masked = false;
7497 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7498 return false;
7500 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7502 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7503 return false;
7505 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7506 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7507 && reduc_def))
7508 return false;
7510 /* FORNOW: not yet supported. */
7511 if (STMT_VINFO_LIVE_P (stmt_info))
7513 if (dump_enabled_p ())
7514 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7515 "value used after loop.\n");
7516 return false;
7520 /* Is vectorizable conditional operation? */
7521 if (!is_gimple_assign (stmt))
7522 return false;
7524 code = gimple_assign_rhs_code (stmt);
7526 if (code != COND_EXPR)
7527 return false;
7529 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7530 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
7531 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7533 if (slp_node || PURE_SLP_STMT (stmt_info))
7534 ncopies = 1;
7535 else
7536 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7538 gcc_assert (ncopies >= 1);
7539 if (reduc_index && ncopies > 1)
7540 return false; /* FORNOW */
7542 cond_expr = gimple_assign_rhs1 (stmt);
7543 then_clause = gimple_assign_rhs2 (stmt);
7544 else_clause = gimple_assign_rhs3 (stmt);
7546 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, &comp_vectype)
7547 || !comp_vectype)
7548 return false;
7550 gimple *def_stmt;
7551 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dt,
7552 &vectype1))
7553 return false;
7554 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dt,
7555 &vectype2))
7556 return false;
7558 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7559 return false;
7561 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
7562 return false;
7564 masked = !COMPARISON_CLASS_P (cond_expr);
7565 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7567 if (vec_cmp_type == NULL_TREE)
7568 return false;
7570 if (!vec_stmt)
7572 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7573 return expand_vec_cond_expr_p (vectype, comp_vectype);
7576 /* Transform. */
7578 if (!slp_node)
7580 vec_oprnds0.create (1);
7581 vec_oprnds1.create (1);
7582 vec_oprnds2.create (1);
7583 vec_oprnds3.create (1);
7586 /* Handle def. */
7587 scalar_dest = gimple_assign_lhs (stmt);
7588 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7590 /* Handle cond expr. */
7591 for (j = 0; j < ncopies; j++)
7593 gassign *new_stmt = NULL;
7594 if (j == 0)
7596 if (slp_node)
7598 auto_vec<tree, 4> ops;
7599 auto_vec<vec<tree>, 4> vec_defs;
7601 if (masked)
7602 ops.safe_push (cond_expr);
7603 else
7605 ops.safe_push (TREE_OPERAND (cond_expr, 0));
7606 ops.safe_push (TREE_OPERAND (cond_expr, 1));
7608 ops.safe_push (then_clause);
7609 ops.safe_push (else_clause);
7610 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7611 vec_oprnds3 = vec_defs.pop ();
7612 vec_oprnds2 = vec_defs.pop ();
7613 if (!masked)
7614 vec_oprnds1 = vec_defs.pop ();
7615 vec_oprnds0 = vec_defs.pop ();
7617 ops.release ();
7618 vec_defs.release ();
7620 else
7622 gimple *gtemp;
7623 if (masked)
7625 vec_cond_lhs
7626 = vect_get_vec_def_for_operand (cond_expr, stmt,
7627 comp_vectype);
7628 vect_is_simple_use (cond_expr, stmt_info->vinfo,
7629 &gtemp, &dts[0]);
7631 else
7633 vec_cond_lhs =
7634 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
7635 stmt, comp_vectype);
7636 vect_is_simple_use (TREE_OPERAND (cond_expr, 0),
7637 loop_vinfo, &gtemp, &dts[0]);
7639 vec_cond_rhs =
7640 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
7641 stmt, comp_vectype);
7642 vect_is_simple_use (TREE_OPERAND (cond_expr, 1),
7643 loop_vinfo, &gtemp, &dts[1]);
7645 if (reduc_index == 1)
7646 vec_then_clause = reduc_def;
7647 else
7649 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
7650 stmt);
7651 vect_is_simple_use (then_clause, loop_vinfo,
7652 &gtemp, &dts[2]);
7654 if (reduc_index == 2)
7655 vec_else_clause = reduc_def;
7656 else
7658 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
7659 stmt);
7660 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
7664 else
7666 vec_cond_lhs
7667 = vect_get_vec_def_for_stmt_copy (dts[0],
7668 vec_oprnds0.pop ());
7669 if (!masked)
7670 vec_cond_rhs
7671 = vect_get_vec_def_for_stmt_copy (dts[1],
7672 vec_oprnds1.pop ());
7674 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
7675 vec_oprnds2.pop ());
7676 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
7677 vec_oprnds3.pop ());
7680 if (!slp_node)
7682 vec_oprnds0.quick_push (vec_cond_lhs);
7683 if (!masked)
7684 vec_oprnds1.quick_push (vec_cond_rhs);
7685 vec_oprnds2.quick_push (vec_then_clause);
7686 vec_oprnds3.quick_push (vec_else_clause);
7689 /* Arguments are ready. Create the new vector stmt. */
7690 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
7692 vec_then_clause = vec_oprnds2[i];
7693 vec_else_clause = vec_oprnds3[i];
7695 if (masked)
7696 vec_compare = vec_cond_lhs;
7697 else
7699 vec_cond_rhs = vec_oprnds1[i];
7700 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
7701 vec_cond_lhs, vec_cond_rhs);
7703 new_temp = make_ssa_name (vec_dest);
7704 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
7705 vec_compare, vec_then_clause,
7706 vec_else_clause);
7707 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7708 if (slp_node)
7709 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7712 if (slp_node)
7713 continue;
7715 if (j == 0)
7716 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7717 else
7718 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7720 prev_stmt_info = vinfo_for_stmt (new_stmt);
7723 vec_oprnds0.release ();
7724 vec_oprnds1.release ();
7725 vec_oprnds2.release ();
7726 vec_oprnds3.release ();
7728 return true;
7731 /* vectorizable_comparison.
7733 Check if STMT is comparison expression that can be vectorized.
7734 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7735 comparison, put it in VEC_STMT, and insert it at GSI.
7737 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7739 bool
7740 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
7741 gimple **vec_stmt, tree reduc_def,
7742 slp_tree slp_node)
7744 tree lhs, rhs1, rhs2;
7745 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7746 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7747 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7748 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
7749 tree new_temp;
7750 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7751 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
7752 unsigned nunits;
7753 int ncopies;
7754 enum tree_code code;
7755 stmt_vec_info prev_stmt_info = NULL;
7756 int i, j;
7757 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7758 vec<tree> vec_oprnds0 = vNULL;
7759 vec<tree> vec_oprnds1 = vNULL;
7760 gimple *def_stmt;
7761 tree mask_type;
7762 tree mask;
7764 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7765 return false;
7767 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
7768 return false;
7770 mask_type = vectype;
7771 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7773 if (slp_node || PURE_SLP_STMT (stmt_info))
7774 ncopies = 1;
7775 else
7776 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7778 gcc_assert (ncopies >= 1);
7779 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7780 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7781 && reduc_def))
7782 return false;
7784 if (STMT_VINFO_LIVE_P (stmt_info))
7786 if (dump_enabled_p ())
7787 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7788 "value used after loop.\n");
7789 return false;
7792 if (!is_gimple_assign (stmt))
7793 return false;
7795 code = gimple_assign_rhs_code (stmt);
7797 if (TREE_CODE_CLASS (code) != tcc_comparison)
7798 return false;
7800 rhs1 = gimple_assign_rhs1 (stmt);
7801 rhs2 = gimple_assign_rhs2 (stmt);
7803 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
7804 &dts[0], &vectype1))
7805 return false;
7807 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
7808 &dts[1], &vectype2))
7809 return false;
7811 if (vectype1 && vectype2
7812 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7813 return false;
7815 vectype = vectype1 ? vectype1 : vectype2;
7817 /* Invariant comparison. */
7818 if (!vectype)
7820 vectype = build_vector_type (TREE_TYPE (rhs1), nunits);
7821 if (tree_to_shwi (TYPE_SIZE_UNIT (vectype)) != current_vector_size)
7822 return false;
7824 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
7825 return false;
7827 if (!vec_stmt)
7829 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
7830 vect_model_simple_cost (stmt_info, ncopies, dts, NULL, NULL);
7831 return expand_vec_cmp_expr_p (vectype, mask_type);
7834 /* Transform. */
7835 if (!slp_node)
7837 vec_oprnds0.create (1);
7838 vec_oprnds1.create (1);
7841 /* Handle def. */
7842 lhs = gimple_assign_lhs (stmt);
7843 mask = vect_create_destination_var (lhs, mask_type);
7845 /* Handle cmp expr. */
7846 for (j = 0; j < ncopies; j++)
7848 gassign *new_stmt = NULL;
7849 if (j == 0)
7851 if (slp_node)
7853 auto_vec<tree, 2> ops;
7854 auto_vec<vec<tree>, 2> vec_defs;
7856 ops.safe_push (rhs1);
7857 ops.safe_push (rhs2);
7858 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7859 vec_oprnds1 = vec_defs.pop ();
7860 vec_oprnds0 = vec_defs.pop ();
7862 else
7864 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
7865 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
7868 else
7870 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
7871 vec_oprnds0.pop ());
7872 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
7873 vec_oprnds1.pop ());
7876 if (!slp_node)
7878 vec_oprnds0.quick_push (vec_rhs1);
7879 vec_oprnds1.quick_push (vec_rhs2);
7882 /* Arguments are ready. Create the new vector stmt. */
7883 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
7885 vec_rhs2 = vec_oprnds1[i];
7887 new_temp = make_ssa_name (mask);
7888 new_stmt = gimple_build_assign (new_temp, code, vec_rhs1, vec_rhs2);
7889 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7890 if (slp_node)
7891 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7894 if (slp_node)
7895 continue;
7897 if (j == 0)
7898 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7899 else
7900 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7902 prev_stmt_info = vinfo_for_stmt (new_stmt);
7905 vec_oprnds0.release ();
7906 vec_oprnds1.release ();
7908 return true;
7911 /* Make sure the statement is vectorizable. */
7913 bool
7914 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
7916 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7917 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7918 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
7919 bool ok;
7920 tree scalar_type, vectype;
7921 gimple *pattern_stmt;
7922 gimple_seq pattern_def_seq;
7924 if (dump_enabled_p ())
7926 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
7927 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7930 if (gimple_has_volatile_ops (stmt))
7932 if (dump_enabled_p ())
7933 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7934 "not vectorized: stmt has volatile operands\n");
7936 return false;
7939 /* Skip stmts that do not need to be vectorized. In loops this is expected
7940 to include:
7941 - the COND_EXPR which is the loop exit condition
7942 - any LABEL_EXPRs in the loop
7943 - computations that are used only for array indexing or loop control.
7944 In basic blocks we only analyze statements that are a part of some SLP
7945 instance, therefore, all the statements are relevant.
7947 Pattern statement needs to be analyzed instead of the original statement
7948 if the original statement is not relevant. Otherwise, we analyze both
7949 statements. In basic blocks we are called from some SLP instance
7950 traversal, don't analyze pattern stmts instead, the pattern stmts
7951 already will be part of SLP instance. */
7953 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
7954 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7955 && !STMT_VINFO_LIVE_P (stmt_info))
7957 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7958 && pattern_stmt
7959 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7960 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7962 /* Analyze PATTERN_STMT instead of the original stmt. */
7963 stmt = pattern_stmt;
7964 stmt_info = vinfo_for_stmt (pattern_stmt);
7965 if (dump_enabled_p ())
7967 dump_printf_loc (MSG_NOTE, vect_location,
7968 "==> examining pattern statement: ");
7969 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7972 else
7974 if (dump_enabled_p ())
7975 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
7977 return true;
7980 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7981 && node == NULL
7982 && pattern_stmt
7983 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7984 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7986 /* Analyze PATTERN_STMT too. */
7987 if (dump_enabled_p ())
7989 dump_printf_loc (MSG_NOTE, vect_location,
7990 "==> examining pattern statement: ");
7991 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7994 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7995 return false;
7998 if (is_pattern_stmt_p (stmt_info)
7999 && node == NULL
8000 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
8002 gimple_stmt_iterator si;
8004 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8006 gimple *pattern_def_stmt = gsi_stmt (si);
8007 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8008 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8010 /* Analyze def stmt of STMT if it's a pattern stmt. */
8011 if (dump_enabled_p ())
8013 dump_printf_loc (MSG_NOTE, vect_location,
8014 "==> examining pattern def statement: ");
8015 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8018 if (!vect_analyze_stmt (pattern_def_stmt,
8019 need_to_vectorize, node))
8020 return false;
8025 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8027 case vect_internal_def:
8028 break;
8030 case vect_reduction_def:
8031 case vect_nested_cycle:
8032 gcc_assert (!bb_vinfo
8033 && (relevance == vect_used_in_outer
8034 || relevance == vect_used_in_outer_by_reduction
8035 || relevance == vect_used_by_reduction
8036 || relevance == vect_unused_in_scope));
8037 break;
8039 case vect_induction_def:
8040 case vect_constant_def:
8041 case vect_external_def:
8042 case vect_unknown_def_type:
8043 default:
8044 gcc_unreachable ();
8047 if (bb_vinfo)
8049 gcc_assert (PURE_SLP_STMT (stmt_info));
8051 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
8052 if (dump_enabled_p ())
8054 dump_printf_loc (MSG_NOTE, vect_location,
8055 "get vectype for scalar type: ");
8056 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
8057 dump_printf (MSG_NOTE, "\n");
8060 vectype = get_vectype_for_scalar_type (scalar_type);
8061 if (!vectype)
8063 if (dump_enabled_p ())
8065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8066 "not SLPed: unsupported data-type ");
8067 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
8068 scalar_type);
8069 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8071 return false;
8074 if (dump_enabled_p ())
8076 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
8077 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
8078 dump_printf (MSG_NOTE, "\n");
8081 STMT_VINFO_VECTYPE (stmt_info) = vectype;
8084 if (STMT_VINFO_RELEVANT_P (stmt_info))
8086 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8087 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8088 || (is_gimple_call (stmt)
8089 && gimple_call_lhs (stmt) == NULL_TREE));
8090 *need_to_vectorize = true;
8093 if (PURE_SLP_STMT (stmt_info) && !node)
8095 dump_printf_loc (MSG_NOTE, vect_location,
8096 "handled only by SLP analysis\n");
8097 return true;
8100 ok = true;
8101 if (!bb_vinfo
8102 && (STMT_VINFO_RELEVANT_P (stmt_info)
8103 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8104 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8105 || vectorizable_conversion (stmt, NULL, NULL, node)
8106 || vectorizable_shift (stmt, NULL, NULL, node)
8107 || vectorizable_operation (stmt, NULL, NULL, node)
8108 || vectorizable_assignment (stmt, NULL, NULL, node)
8109 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8110 || vectorizable_call (stmt, NULL, NULL, node)
8111 || vectorizable_store (stmt, NULL, NULL, node)
8112 || vectorizable_reduction (stmt, NULL, NULL, node)
8113 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8114 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8115 else
8117 if (bb_vinfo)
8118 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8119 || vectorizable_conversion (stmt, NULL, NULL, node)
8120 || vectorizable_shift (stmt, NULL, NULL, node)
8121 || vectorizable_operation (stmt, NULL, NULL, node)
8122 || vectorizable_assignment (stmt, NULL, NULL, node)
8123 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8124 || vectorizable_call (stmt, NULL, NULL, node)
8125 || vectorizable_store (stmt, NULL, NULL, node)
8126 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8127 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8130 if (!ok)
8132 if (dump_enabled_p ())
8134 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8135 "not vectorized: relevant stmt not ");
8136 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8137 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8140 return false;
8143 if (bb_vinfo)
8144 return true;
8146 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8147 need extra handling, except for vectorizable reductions. */
8148 if (STMT_VINFO_LIVE_P (stmt_info)
8149 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8150 ok = vectorizable_live_operation (stmt, NULL, NULL);
8152 if (!ok)
8154 if (dump_enabled_p ())
8156 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8157 "not vectorized: live stmt not ");
8158 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8159 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8162 return false;
8165 return true;
8169 /* Function vect_transform_stmt.
8171 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8173 bool
8174 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8175 bool *grouped_store, slp_tree slp_node,
8176 slp_instance slp_node_instance)
8178 bool is_store = false;
8179 gimple *vec_stmt = NULL;
8180 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8181 bool done;
8183 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8185 switch (STMT_VINFO_TYPE (stmt_info))
8187 case type_demotion_vec_info_type:
8188 case type_promotion_vec_info_type:
8189 case type_conversion_vec_info_type:
8190 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8191 gcc_assert (done);
8192 break;
8194 case induc_vec_info_type:
8195 gcc_assert (!slp_node);
8196 done = vectorizable_induction (stmt, gsi, &vec_stmt);
8197 gcc_assert (done);
8198 break;
8200 case shift_vec_info_type:
8201 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8202 gcc_assert (done);
8203 break;
8205 case op_vec_info_type:
8206 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8207 gcc_assert (done);
8208 break;
8210 case assignment_vec_info_type:
8211 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8212 gcc_assert (done);
8213 break;
8215 case load_vec_info_type:
8216 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8217 slp_node_instance);
8218 gcc_assert (done);
8219 break;
8221 case store_vec_info_type:
8222 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8223 gcc_assert (done);
8224 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8226 /* In case of interleaving, the whole chain is vectorized when the
8227 last store in the chain is reached. Store stmts before the last
8228 one are skipped, and there vec_stmt_info shouldn't be freed
8229 meanwhile. */
8230 *grouped_store = true;
8231 if (STMT_VINFO_VEC_STMT (stmt_info))
8232 is_store = true;
8234 else
8235 is_store = true;
8236 break;
8238 case condition_vec_info_type:
8239 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8240 gcc_assert (done);
8241 break;
8243 case comparison_vec_info_type:
8244 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8245 gcc_assert (done);
8246 break;
8248 case call_vec_info_type:
8249 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8250 stmt = gsi_stmt (*gsi);
8251 if (is_gimple_call (stmt)
8252 && gimple_call_internal_p (stmt)
8253 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
8254 is_store = true;
8255 break;
8257 case call_simd_clone_vec_info_type:
8258 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8259 stmt = gsi_stmt (*gsi);
8260 break;
8262 case reduc_vec_info_type:
8263 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
8264 gcc_assert (done);
8265 break;
8267 default:
8268 if (!STMT_VINFO_LIVE_P (stmt_info))
8270 if (dump_enabled_p ())
8271 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8272 "stmt not supported.\n");
8273 gcc_unreachable ();
8277 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8278 This would break hybrid SLP vectorization. */
8279 if (slp_node)
8280 gcc_assert (!vec_stmt
8281 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8283 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8284 is being vectorized, but outside the immediately enclosing loop. */
8285 if (vec_stmt
8286 && STMT_VINFO_LOOP_VINFO (stmt_info)
8287 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8288 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8289 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8290 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8291 || STMT_VINFO_RELEVANT (stmt_info) ==
8292 vect_used_in_outer_by_reduction))
8294 struct loop *innerloop = LOOP_VINFO_LOOP (
8295 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8296 imm_use_iterator imm_iter;
8297 use_operand_p use_p;
8298 tree scalar_dest;
8299 gimple *exit_phi;
8301 if (dump_enabled_p ())
8302 dump_printf_loc (MSG_NOTE, vect_location,
8303 "Record the vdef for outer-loop vectorization.\n");
8305 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8306 (to be used when vectorizing outer-loop stmts that use the DEF of
8307 STMT). */
8308 if (gimple_code (stmt) == GIMPLE_PHI)
8309 scalar_dest = PHI_RESULT (stmt);
8310 else
8311 scalar_dest = gimple_assign_lhs (stmt);
8313 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8315 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8317 exit_phi = USE_STMT (use_p);
8318 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8323 /* Handle stmts whose DEF is used outside the loop-nest that is
8324 being vectorized. */
8325 if (STMT_VINFO_LIVE_P (stmt_info)
8326 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8328 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
8329 gcc_assert (done);
8332 if (vec_stmt)
8333 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8335 return is_store;
8339 /* Remove a group of stores (for SLP or interleaving), free their
8340 stmt_vec_info. */
8342 void
8343 vect_remove_stores (gimple *first_stmt)
8345 gimple *next = first_stmt;
8346 gimple *tmp;
8347 gimple_stmt_iterator next_si;
8349 while (next)
8351 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8353 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8354 if (is_pattern_stmt_p (stmt_info))
8355 next = STMT_VINFO_RELATED_STMT (stmt_info);
8356 /* Free the attached stmt_vec_info and remove the stmt. */
8357 next_si = gsi_for_stmt (next);
8358 unlink_stmt_vdef (next);
8359 gsi_remove (&next_si, true);
8360 release_defs (next);
8361 free_stmt_vec_info (next);
8362 next = tmp;
8367 /* Function new_stmt_vec_info.
8369 Create and initialize a new stmt_vec_info struct for STMT. */
8371 stmt_vec_info
8372 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
8374 stmt_vec_info res;
8375 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8377 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8378 STMT_VINFO_STMT (res) = stmt;
8379 res->vinfo = vinfo;
8380 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
8381 STMT_VINFO_LIVE_P (res) = false;
8382 STMT_VINFO_VECTYPE (res) = NULL;
8383 STMT_VINFO_VEC_STMT (res) = NULL;
8384 STMT_VINFO_VECTORIZABLE (res) = true;
8385 STMT_VINFO_IN_PATTERN_P (res) = false;
8386 STMT_VINFO_RELATED_STMT (res) = NULL;
8387 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
8388 STMT_VINFO_DATA_REF (res) = NULL;
8389 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
8391 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
8392 STMT_VINFO_DR_OFFSET (res) = NULL;
8393 STMT_VINFO_DR_INIT (res) = NULL;
8394 STMT_VINFO_DR_STEP (res) = NULL;
8395 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
8397 if (gimple_code (stmt) == GIMPLE_PHI
8398 && is_loop_header_bb_p (gimple_bb (stmt)))
8399 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8400 else
8401 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8403 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
8404 STMT_SLP_TYPE (res) = loop_vect;
8405 STMT_VINFO_NUM_SLP_USES (res) = 0;
8407 GROUP_FIRST_ELEMENT (res) = NULL;
8408 GROUP_NEXT_ELEMENT (res) = NULL;
8409 GROUP_SIZE (res) = 0;
8410 GROUP_STORE_COUNT (res) = 0;
8411 GROUP_GAP (res) = 0;
8412 GROUP_SAME_DR_STMT (res) = NULL;
8414 return res;
8418 /* Create a hash table for stmt_vec_info. */
8420 void
8421 init_stmt_vec_info_vec (void)
8423 gcc_assert (!stmt_vec_info_vec.exists ());
8424 stmt_vec_info_vec.create (50);
8428 /* Free hash table for stmt_vec_info. */
8430 void
8431 free_stmt_vec_info_vec (void)
8433 unsigned int i;
8434 stmt_vec_info info;
8435 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
8436 if (info != NULL)
8437 free_stmt_vec_info (STMT_VINFO_STMT (info));
8438 gcc_assert (stmt_vec_info_vec.exists ());
8439 stmt_vec_info_vec.release ();
8443 /* Free stmt vectorization related info. */
8445 void
8446 free_stmt_vec_info (gimple *stmt)
8448 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8450 if (!stmt_info)
8451 return;
8453 /* Check if this statement has a related "pattern stmt"
8454 (introduced by the vectorizer during the pattern recognition
8455 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8456 too. */
8457 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
8459 stmt_vec_info patt_info
8460 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8461 if (patt_info)
8463 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
8464 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
8465 gimple_set_bb (patt_stmt, NULL);
8466 tree lhs = gimple_get_lhs (patt_stmt);
8467 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8468 release_ssa_name (lhs);
8469 if (seq)
8471 gimple_stmt_iterator si;
8472 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
8474 gimple *seq_stmt = gsi_stmt (si);
8475 gimple_set_bb (seq_stmt, NULL);
8476 lhs = gimple_get_lhs (seq_stmt);
8477 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8478 release_ssa_name (lhs);
8479 free_stmt_vec_info (seq_stmt);
8482 free_stmt_vec_info (patt_stmt);
8486 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
8487 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
8488 set_vinfo_for_stmt (stmt, NULL);
8489 free (stmt_info);
8493 /* Function get_vectype_for_scalar_type_and_size.
8495 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8496 by the target. */
8498 static tree
8499 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
8501 machine_mode inner_mode = TYPE_MODE (scalar_type);
8502 machine_mode simd_mode;
8503 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
8504 int nunits;
8505 tree vectype;
8507 if (nbytes == 0)
8508 return NULL_TREE;
8510 if (GET_MODE_CLASS (inner_mode) != MODE_INT
8511 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
8512 return NULL_TREE;
8514 /* For vector types of elements whose mode precision doesn't
8515 match their types precision we use a element type of mode
8516 precision. The vectorization routines will have to make sure
8517 they support the proper result truncation/extension.
8518 We also make sure to build vector types with INTEGER_TYPE
8519 component type only. */
8520 if (INTEGRAL_TYPE_P (scalar_type)
8521 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
8522 || TREE_CODE (scalar_type) != INTEGER_TYPE))
8523 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
8524 TYPE_UNSIGNED (scalar_type));
8526 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8527 When the component mode passes the above test simply use a type
8528 corresponding to that mode. The theory is that any use that
8529 would cause problems with this will disable vectorization anyway. */
8530 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
8531 && !INTEGRAL_TYPE_P (scalar_type))
8532 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
8534 /* We can't build a vector type of elements with alignment bigger than
8535 their size. */
8536 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
8537 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
8538 TYPE_UNSIGNED (scalar_type));
8540 /* If we felt back to using the mode fail if there was
8541 no scalar type for it. */
8542 if (scalar_type == NULL_TREE)
8543 return NULL_TREE;
8545 /* If no size was supplied use the mode the target prefers. Otherwise
8546 lookup a vector mode of the specified size. */
8547 if (size == 0)
8548 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
8549 else
8550 simd_mode = mode_for_vector (inner_mode, size / nbytes);
8551 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
8552 if (nunits <= 1)
8553 return NULL_TREE;
8555 vectype = build_vector_type (scalar_type, nunits);
8557 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8558 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
8559 return NULL_TREE;
8561 return vectype;
8564 unsigned int current_vector_size;
8566 /* Function get_vectype_for_scalar_type.
8568 Returns the vector type corresponding to SCALAR_TYPE as supported
8569 by the target. */
8571 tree
8572 get_vectype_for_scalar_type (tree scalar_type)
8574 tree vectype;
8575 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
8576 current_vector_size);
8577 if (vectype
8578 && current_vector_size == 0)
8579 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
8580 return vectype;
8583 /* Function get_mask_type_for_scalar_type.
8585 Returns the mask type corresponding to a result of comparison
8586 of vectors of specified SCALAR_TYPE as supported by target. */
8588 tree
8589 get_mask_type_for_scalar_type (tree scalar_type)
8591 tree vectype = get_vectype_for_scalar_type (scalar_type);
8593 if (!vectype)
8594 return NULL;
8596 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
8597 current_vector_size);
8600 /* Function get_same_sized_vectype
8602 Returns a vector type corresponding to SCALAR_TYPE of size
8603 VECTOR_TYPE if supported by the target. */
8605 tree
8606 get_same_sized_vectype (tree scalar_type, tree vector_type)
8608 if (TREE_CODE (scalar_type) == BOOLEAN_TYPE)
8609 return build_same_sized_truth_vector_type (vector_type);
8611 return get_vectype_for_scalar_type_and_size
8612 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
8615 /* Function vect_is_simple_use.
8617 Input:
8618 VINFO - the vect info of the loop or basic block that is being vectorized.
8619 OPERAND - operand in the loop or bb.
8620 Output:
8621 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
8622 DT - the type of definition
8624 Returns whether a stmt with OPERAND can be vectorized.
8625 For loops, supportable operands are constants, loop invariants, and operands
8626 that are defined by the current iteration of the loop. Unsupportable
8627 operands are those that are defined by a previous iteration of the loop (as
8628 is the case in reduction/induction computations).
8629 For basic blocks, supportable operands are constants and bb invariants.
8630 For now, operands defined outside the basic block are not supported. */
8632 bool
8633 vect_is_simple_use (tree operand, vec_info *vinfo,
8634 gimple **def_stmt, enum vect_def_type *dt)
8636 *def_stmt = NULL;
8637 *dt = vect_unknown_def_type;
8639 if (dump_enabled_p ())
8641 dump_printf_loc (MSG_NOTE, vect_location,
8642 "vect_is_simple_use: operand ");
8643 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
8644 dump_printf (MSG_NOTE, "\n");
8647 if (CONSTANT_CLASS_P (operand))
8649 *dt = vect_constant_def;
8650 return true;
8653 if (is_gimple_min_invariant (operand))
8655 *dt = vect_external_def;
8656 return true;
8659 if (TREE_CODE (operand) != SSA_NAME)
8661 if (dump_enabled_p ())
8662 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8663 "not ssa-name.\n");
8664 return false;
8667 if (SSA_NAME_IS_DEFAULT_DEF (operand))
8669 *dt = vect_external_def;
8670 return true;
8673 *def_stmt = SSA_NAME_DEF_STMT (operand);
8674 if (dump_enabled_p ())
8676 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
8677 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
8680 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
8681 *dt = vect_external_def;
8682 else
8684 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
8685 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
8688 if (dump_enabled_p ())
8690 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
8691 switch (*dt)
8693 case vect_uninitialized_def:
8694 dump_printf (MSG_NOTE, "uninitialized\n");
8695 break;
8696 case vect_constant_def:
8697 dump_printf (MSG_NOTE, "constant\n");
8698 break;
8699 case vect_external_def:
8700 dump_printf (MSG_NOTE, "external\n");
8701 break;
8702 case vect_internal_def:
8703 dump_printf (MSG_NOTE, "internal\n");
8704 break;
8705 case vect_induction_def:
8706 dump_printf (MSG_NOTE, "induction\n");
8707 break;
8708 case vect_reduction_def:
8709 dump_printf (MSG_NOTE, "reduction\n");
8710 break;
8711 case vect_double_reduction_def:
8712 dump_printf (MSG_NOTE, "double reduction\n");
8713 break;
8714 case vect_nested_cycle:
8715 dump_printf (MSG_NOTE, "nested cycle\n");
8716 break;
8717 case vect_unknown_def_type:
8718 dump_printf (MSG_NOTE, "unknown\n");
8719 break;
8723 if (*dt == vect_unknown_def_type)
8725 if (dump_enabled_p ())
8726 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8727 "Unsupported pattern.\n");
8728 return false;
8731 switch (gimple_code (*def_stmt))
8733 case GIMPLE_PHI:
8734 case GIMPLE_ASSIGN:
8735 case GIMPLE_CALL:
8736 break;
8737 default:
8738 if (dump_enabled_p ())
8739 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8740 "unsupported defining stmt:\n");
8741 return false;
8744 return true;
8747 /* Function vect_is_simple_use.
8749 Same as vect_is_simple_use but also determines the vector operand
8750 type of OPERAND and stores it to *VECTYPE. If the definition of
8751 OPERAND is vect_uninitialized_def, vect_constant_def or
8752 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8753 is responsible to compute the best suited vector type for the
8754 scalar operand. */
8756 bool
8757 vect_is_simple_use (tree operand, vec_info *vinfo,
8758 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
8760 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
8761 return false;
8763 /* Now get a vector type if the def is internal, otherwise supply
8764 NULL_TREE and leave it up to the caller to figure out a proper
8765 type for the use stmt. */
8766 if (*dt == vect_internal_def
8767 || *dt == vect_induction_def
8768 || *dt == vect_reduction_def
8769 || *dt == vect_double_reduction_def
8770 || *dt == vect_nested_cycle)
8772 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
8774 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8775 && !STMT_VINFO_RELEVANT (stmt_info)
8776 && !STMT_VINFO_LIVE_P (stmt_info))
8777 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8779 *vectype = STMT_VINFO_VECTYPE (stmt_info);
8780 gcc_assert (*vectype != NULL_TREE);
8782 else if (*dt == vect_uninitialized_def
8783 || *dt == vect_constant_def
8784 || *dt == vect_external_def)
8785 *vectype = NULL_TREE;
8786 else
8787 gcc_unreachable ();
8789 return true;
8793 /* Function supportable_widening_operation
8795 Check whether an operation represented by the code CODE is a
8796 widening operation that is supported by the target platform in
8797 vector form (i.e., when operating on arguments of type VECTYPE_IN
8798 producing a result of type VECTYPE_OUT).
8800 Widening operations we currently support are NOP (CONVERT), FLOAT
8801 and WIDEN_MULT. This function checks if these operations are supported
8802 by the target platform either directly (via vector tree-codes), or via
8803 target builtins.
8805 Output:
8806 - CODE1 and CODE2 are codes of vector operations to be used when
8807 vectorizing the operation, if available.
8808 - MULTI_STEP_CVT determines the number of required intermediate steps in
8809 case of multi-step conversion (like char->short->int - in that case
8810 MULTI_STEP_CVT will be 1).
8811 - INTERM_TYPES contains the intermediate type required to perform the
8812 widening operation (short in the above example). */
8814 bool
8815 supportable_widening_operation (enum tree_code code, gimple *stmt,
8816 tree vectype_out, tree vectype_in,
8817 enum tree_code *code1, enum tree_code *code2,
8818 int *multi_step_cvt,
8819 vec<tree> *interm_types)
8821 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8822 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
8823 struct loop *vect_loop = NULL;
8824 machine_mode vec_mode;
8825 enum insn_code icode1, icode2;
8826 optab optab1, optab2;
8827 tree vectype = vectype_in;
8828 tree wide_vectype = vectype_out;
8829 enum tree_code c1, c2;
8830 int i;
8831 tree prev_type, intermediate_type;
8832 machine_mode intermediate_mode, prev_mode;
8833 optab optab3, optab4;
8835 *multi_step_cvt = 0;
8836 if (loop_info)
8837 vect_loop = LOOP_VINFO_LOOP (loop_info);
8839 switch (code)
8841 case WIDEN_MULT_EXPR:
8842 /* The result of a vectorized widening operation usually requires
8843 two vectors (because the widened results do not fit into one vector).
8844 The generated vector results would normally be expected to be
8845 generated in the same order as in the original scalar computation,
8846 i.e. if 8 results are generated in each vector iteration, they are
8847 to be organized as follows:
8848 vect1: [res1,res2,res3,res4],
8849 vect2: [res5,res6,res7,res8].
8851 However, in the special case that the result of the widening
8852 operation is used in a reduction computation only, the order doesn't
8853 matter (because when vectorizing a reduction we change the order of
8854 the computation). Some targets can take advantage of this and
8855 generate more efficient code. For example, targets like Altivec,
8856 that support widen_mult using a sequence of {mult_even,mult_odd}
8857 generate the following vectors:
8858 vect1: [res1,res3,res5,res7],
8859 vect2: [res2,res4,res6,res8].
8861 When vectorizing outer-loops, we execute the inner-loop sequentially
8862 (each vectorized inner-loop iteration contributes to VF outer-loop
8863 iterations in parallel). We therefore don't allow to change the
8864 order of the computation in the inner-loop during outer-loop
8865 vectorization. */
8866 /* TODO: Another case in which order doesn't *really* matter is when we
8867 widen and then contract again, e.g. (short)((int)x * y >> 8).
8868 Normally, pack_trunc performs an even/odd permute, whereas the
8869 repack from an even/odd expansion would be an interleave, which
8870 would be significantly simpler for e.g. AVX2. */
8871 /* In any case, in order to avoid duplicating the code below, recurse
8872 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8873 are properly set up for the caller. If we fail, we'll continue with
8874 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8875 if (vect_loop
8876 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
8877 && !nested_in_vect_loop_p (vect_loop, stmt)
8878 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
8879 stmt, vectype_out, vectype_in,
8880 code1, code2, multi_step_cvt,
8881 interm_types))
8883 /* Elements in a vector with vect_used_by_reduction property cannot
8884 be reordered if the use chain with this property does not have the
8885 same operation. One such an example is s += a * b, where elements
8886 in a and b cannot be reordered. Here we check if the vector defined
8887 by STMT is only directly used in the reduction statement. */
8888 tree lhs = gimple_assign_lhs (stmt);
8889 use_operand_p dummy;
8890 gimple *use_stmt;
8891 stmt_vec_info use_stmt_info = NULL;
8892 if (single_imm_use (lhs, &dummy, &use_stmt)
8893 && (use_stmt_info = vinfo_for_stmt (use_stmt))
8894 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
8895 return true;
8897 c1 = VEC_WIDEN_MULT_LO_EXPR;
8898 c2 = VEC_WIDEN_MULT_HI_EXPR;
8899 break;
8901 case DOT_PROD_EXPR:
8902 c1 = DOT_PROD_EXPR;
8903 c2 = DOT_PROD_EXPR;
8904 break;
8906 case SAD_EXPR:
8907 c1 = SAD_EXPR;
8908 c2 = SAD_EXPR;
8909 break;
8911 case VEC_WIDEN_MULT_EVEN_EXPR:
8912 /* Support the recursion induced just above. */
8913 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
8914 c2 = VEC_WIDEN_MULT_ODD_EXPR;
8915 break;
8917 case WIDEN_LSHIFT_EXPR:
8918 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
8919 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
8920 break;
8922 CASE_CONVERT:
8923 c1 = VEC_UNPACK_LO_EXPR;
8924 c2 = VEC_UNPACK_HI_EXPR;
8925 break;
8927 case FLOAT_EXPR:
8928 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
8929 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
8930 break;
8932 case FIX_TRUNC_EXPR:
8933 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8934 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8935 computing the operation. */
8936 return false;
8938 default:
8939 gcc_unreachable ();
8942 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
8943 std::swap (c1, c2);
8945 if (code == FIX_TRUNC_EXPR)
8947 /* The signedness is determined from output operand. */
8948 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8949 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
8951 else
8953 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8954 optab2 = optab_for_tree_code (c2, vectype, optab_default);
8957 if (!optab1 || !optab2)
8958 return false;
8960 vec_mode = TYPE_MODE (vectype);
8961 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
8962 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
8963 return false;
8965 *code1 = c1;
8966 *code2 = c2;
8968 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8969 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8970 return true;
8972 /* Check if it's a multi-step conversion that can be done using intermediate
8973 types. */
8975 prev_type = vectype;
8976 prev_mode = vec_mode;
8978 if (!CONVERT_EXPR_CODE_P (code))
8979 return false;
8981 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8982 intermediate steps in promotion sequence. We try
8983 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8984 not. */
8985 interm_types->create (MAX_INTERM_CVT_STEPS);
8986 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8988 intermediate_mode = insn_data[icode1].operand[0].mode;
8989 intermediate_type
8990 = lang_hooks.types.type_for_mode (intermediate_mode,
8991 TYPE_UNSIGNED (prev_type));
8992 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
8993 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
8995 if (!optab3 || !optab4
8996 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
8997 || insn_data[icode1].operand[0].mode != intermediate_mode
8998 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
8999 || insn_data[icode2].operand[0].mode != intermediate_mode
9000 || ((icode1 = optab_handler (optab3, intermediate_mode))
9001 == CODE_FOR_nothing)
9002 || ((icode2 = optab_handler (optab4, intermediate_mode))
9003 == CODE_FOR_nothing))
9004 break;
9006 interm_types->quick_push (intermediate_type);
9007 (*multi_step_cvt)++;
9009 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9010 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9011 return true;
9013 prev_type = intermediate_type;
9014 prev_mode = intermediate_mode;
9017 interm_types->release ();
9018 return false;
9022 /* Function supportable_narrowing_operation
9024 Check whether an operation represented by the code CODE is a
9025 narrowing operation that is supported by the target platform in
9026 vector form (i.e., when operating on arguments of type VECTYPE_IN
9027 and producing a result of type VECTYPE_OUT).
9029 Narrowing operations we currently support are NOP (CONVERT) and
9030 FIX_TRUNC. This function checks if these operations are supported by
9031 the target platform directly via vector tree-codes.
9033 Output:
9034 - CODE1 is the code of a vector operation to be used when
9035 vectorizing the operation, if available.
9036 - MULTI_STEP_CVT determines the number of required intermediate steps in
9037 case of multi-step conversion (like int->short->char - in that case
9038 MULTI_STEP_CVT will be 1).
9039 - INTERM_TYPES contains the intermediate type required to perform the
9040 narrowing operation (short in the above example). */
9042 bool
9043 supportable_narrowing_operation (enum tree_code code,
9044 tree vectype_out, tree vectype_in,
9045 enum tree_code *code1, int *multi_step_cvt,
9046 vec<tree> *interm_types)
9048 machine_mode vec_mode;
9049 enum insn_code icode1;
9050 optab optab1, interm_optab;
9051 tree vectype = vectype_in;
9052 tree narrow_vectype = vectype_out;
9053 enum tree_code c1;
9054 tree intermediate_type;
9055 machine_mode intermediate_mode, prev_mode;
9056 int i;
9057 bool uns;
9059 *multi_step_cvt = 0;
9060 switch (code)
9062 CASE_CONVERT:
9063 c1 = VEC_PACK_TRUNC_EXPR;
9064 break;
9066 case FIX_TRUNC_EXPR:
9067 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9068 break;
9070 case FLOAT_EXPR:
9071 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9072 tree code and optabs used for computing the operation. */
9073 return false;
9075 default:
9076 gcc_unreachable ();
9079 if (code == FIX_TRUNC_EXPR)
9080 /* The signedness is determined from output operand. */
9081 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9082 else
9083 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9085 if (!optab1)
9086 return false;
9088 vec_mode = TYPE_MODE (vectype);
9089 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9090 return false;
9092 *code1 = c1;
9094 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9095 return true;
9097 /* Check if it's a multi-step conversion that can be done using intermediate
9098 types. */
9099 prev_mode = vec_mode;
9100 if (code == FIX_TRUNC_EXPR)
9101 uns = TYPE_UNSIGNED (vectype_out);
9102 else
9103 uns = TYPE_UNSIGNED (vectype);
9105 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9106 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9107 costly than signed. */
9108 if (code == FIX_TRUNC_EXPR && uns)
9110 enum insn_code icode2;
9112 intermediate_type
9113 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9114 interm_optab
9115 = optab_for_tree_code (c1, intermediate_type, optab_default);
9116 if (interm_optab != unknown_optab
9117 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9118 && insn_data[icode1].operand[0].mode
9119 == insn_data[icode2].operand[0].mode)
9121 uns = false;
9122 optab1 = interm_optab;
9123 icode1 = icode2;
9127 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9128 intermediate steps in promotion sequence. We try
9129 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9130 interm_types->create (MAX_INTERM_CVT_STEPS);
9131 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9133 intermediate_mode = insn_data[icode1].operand[0].mode;
9134 intermediate_type
9135 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9136 interm_optab
9137 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9138 optab_default);
9139 if (!interm_optab
9140 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9141 || insn_data[icode1].operand[0].mode != intermediate_mode
9142 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9143 == CODE_FOR_nothing))
9144 break;
9146 interm_types->quick_push (intermediate_type);
9147 (*multi_step_cvt)++;
9149 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9150 return true;
9152 prev_mode = intermediate_mode;
9153 optab1 = interm_optab;
9156 interm_types->release ();
9157 return false;