linux-unwind.h: Add exception clause to copyright.
[official-gcc.git] / gcc / tree-vectorizer.c
blob06b74036e2cdbf9174163a5d537bd4f701f6a9ee
1 /* Loop Vectorization
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
33 for (i=0; i<N; i++){
34 a[i] = b[i] + c[i];
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
42 v8hi va, vb, vc;
44 for (i=0; i<N/8; i++){
45 vb = pb[i];
46 vc = pc[i];
47 va = vb + vc;
48 pa[i] = va;
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
64 Analysis phase:
65 ===============
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
75 Transformation phase:
76 =====================
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
88 VS1: vb = px[i];
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
90 S2: a = b;
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
97 VS1: vb = px[i];
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
99 VS2: va = vb;
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
105 Target modeling:
106 =================
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
123 #include "config.h"
124 #include "system.h"
125 #include "coretypes.h"
126 #include "tm.h"
127 #include "errors.h"
128 #include "ggc.h"
129 #include "tree.h"
130 #include "target.h"
132 #include "rtl.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
137 #include "timevar.h"
138 #include "cfgloop.h"
139 #include "cfglayout.h"
140 #include "expr.h"
141 #include "optabs.h"
142 #include "toplev.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
146 #include "input.h"
147 #include "tree-vectorizer.h"
148 #include "tree-pass.h"
149 #include "langhooks.h"
152 /*************************************************************************
153 Simple Loop Peeling Utilities
154 *************************************************************************/
156 /* Entry point for peeling of simple loops.
157 Peel the first/last iterations of a loop.
158 It can be used outside of the vectorizer for loops that are simple enough
159 (see function documentation). In the vectorizer it is used to peel the
160 last few iterations when the loop bound is unknown or does not evenly
161 divide by the vectorization factor, and to peel the first few iterations
162 to force the alignment of data references in the loop. */
163 struct loop *slpeel_tree_peel_loop_to_edge
164 (struct loop *, struct loops *, edge, tree, tree, bool);
165 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
166 (struct loop *, struct loops *, edge);
167 static void slpeel_update_phis_for_duplicate_loop
168 (struct loop *, struct loop *, bool after);
169 static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool);
170 static void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
171 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
172 static bool slpeel_can_duplicate_loop_p (struct loop *, edge);
173 static void allocate_new_names (bitmap);
174 static void rename_use_op (use_operand_p);
175 static void rename_def_op (def_operand_p, tree);
176 static void rename_variables_in_bb (basic_block);
177 static void free_new_names (bitmap);
178 static void rename_variables_in_loop (struct loop *);
179 #ifdef ENABLE_CHECKING
180 static void slpeel_verify_cfg_after_peeling (struct loop *, struct loop *);
181 #endif
182 static LOC find_loop_location (struct loop *);
185 /*************************************************************************
186 Vectorization Utilities.
187 *************************************************************************/
189 /* Main analysis functions. */
190 static loop_vec_info vect_analyze_loop (struct loop *);
191 static loop_vec_info vect_analyze_loop_form (struct loop *);
192 static bool vect_analyze_data_refs (loop_vec_info);
193 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
194 static bool vect_analyze_scalar_cycles (loop_vec_info);
195 static bool vect_analyze_data_ref_accesses (loop_vec_info);
196 static bool vect_analyze_data_ref_dependence
197 (struct data_reference *, struct data_reference *, loop_vec_info);
198 static bool vect_analyze_data_ref_dependences (loop_vec_info);
199 static bool vect_analyze_data_refs_alignment (loop_vec_info);
200 static bool vect_compute_data_refs_alignment (loop_vec_info);
201 static bool vect_analyze_operations (loop_vec_info);
203 /* Main code transformation functions. */
204 static void vect_transform_loop (loop_vec_info, struct loops *);
205 static bool vect_transform_stmt (tree, block_stmt_iterator *);
206 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
207 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
208 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
209 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
210 static enum dr_alignment_support vect_supportable_dr_alignment
211 (struct data_reference *);
212 static void vect_align_data_ref (tree);
213 static void vect_enhance_data_refs_alignment (loop_vec_info);
215 /* Utility functions for the analyses. */
216 static bool vect_is_simple_use (tree , loop_vec_info, tree *);
217 static bool exist_non_indexing_operands_for_use_p (tree, tree);
218 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
219 static void vect_mark_relevant (varray_type *, tree);
220 static bool vect_stmt_relevant_p (tree, loop_vec_info);
221 static tree vect_get_loop_niters (struct loop *, tree *);
222 static bool vect_compute_data_ref_alignment (struct data_reference *);
223 static bool vect_analyze_data_ref_access (struct data_reference *);
224 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
225 static struct data_reference * vect_analyze_pointer_ref_access
226 (tree, tree, bool);
227 static bool vect_can_advance_ivs_p (loop_vec_info);
228 static tree vect_get_base_and_offset (struct data_reference *, tree, tree,
229 loop_vec_info, tree *, tree *, tree *,
230 bool*);
231 static struct data_reference * vect_analyze_pointer_ref_access
232 (tree, tree, bool);
233 static tree vect_get_ptr_offset (tree, tree, tree *);
234 static tree vect_get_memtag_and_dr
235 (tree, tree, bool, loop_vec_info, tree, struct data_reference **);
236 static bool vect_analyze_offset_expr (tree, struct loop *, tree, tree *,
237 tree *, tree *);
238 static tree vect_strip_conversion (tree);
240 /* Utility functions for the code transformation. */
241 static tree vect_create_destination_var (tree, tree);
242 static tree vect_create_data_ref_ptr
243 (tree, block_stmt_iterator *, tree, tree *, bool);
244 static tree vect_create_index_for_vector_ref (loop_vec_info);
245 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
246 static tree get_vectype_for_scalar_type (tree);
247 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
248 static tree vect_get_vec_def_for_operand (tree, tree);
249 static tree vect_init_vector (tree, tree);
250 static void vect_finish_stmt_generation
251 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
253 /* Utility function dealing with loop peeling (not peeling itself). */
254 static void vect_generate_tmps_on_preheader
255 (loop_vec_info, tree *, tree *, tree *);
256 static tree vect_build_loop_niters (loop_vec_info);
257 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
258 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
259 static void vect_update_inits_of_dr (struct data_reference *, tree niters);
260 static void vect_update_inits_of_drs (loop_vec_info, tree);
261 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
262 static void vect_do_peeling_for_loop_bound
263 (loop_vec_info, tree *, struct loops *);
265 /* Utilities for creation and deletion of vec_info structs. */
266 loop_vec_info new_loop_vec_info (struct loop *loop);
267 void destroy_loop_vec_info (loop_vec_info);
268 stmt_vec_info new_stmt_vec_info (tree, loop_vec_info);
270 /*************************************************************************
271 Vectorization Debug Information.
272 *************************************************************************/
274 /* vect_verbosity_level set to invalid verbosity level to mark that it's
275 uninitialized. */
276 enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
278 /* vect_dump will be set to stderr or dump_file if exist. */
279 FILE *vect_dump;
281 /* Utilities for output formatting. */
282 static bool vect_print_dump_info (enum verbosity_levels, LOC);
283 static void vect_set_dump_settings (void);
284 void vect_set_verbosity_level (const char *);
288 /*************************************************************************
289 Simple Loop Peeling Utilities
291 Utilities to support loop peeling for vectorization purposes.
292 *************************************************************************/
295 /* For each definition in DEFINITIONS this function allocates
296 new ssa name. */
298 static void
299 allocate_new_names (bitmap definitions)
301 unsigned ver;
302 bitmap_iterator bi;
304 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
306 tree def = ssa_name (ver);
307 tree *new_name_ptr = xmalloc (sizeof (tree));
309 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
311 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
312 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
314 SSA_NAME_AUX (def) = new_name_ptr;
319 /* Renames the use *OP_P. */
321 static void
322 rename_use_op (use_operand_p op_p)
324 tree *new_name_ptr;
326 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
327 return;
329 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
331 /* Something defined outside of the loop. */
332 if (!new_name_ptr)
333 return;
335 /* An ordinary ssa name defined in the loop. */
337 SET_USE (op_p, *new_name_ptr);
341 /* Renames the def *OP_P in statement STMT. */
343 static void
344 rename_def_op (def_operand_p op_p, tree stmt)
346 tree *new_name_ptr;
348 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
349 return;
351 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
353 /* Something defined outside of the loop. */
354 if (!new_name_ptr)
355 return;
357 /* An ordinary ssa name defined in the loop. */
359 SET_DEF (op_p, *new_name_ptr);
360 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
364 /* Renames the variables in basic block BB. */
366 static void
367 rename_variables_in_bb (basic_block bb)
369 tree phi;
370 block_stmt_iterator bsi;
371 tree stmt;
372 stmt_ann_t ann;
373 use_optype uses;
374 vuse_optype vuses;
375 def_optype defs;
376 v_may_def_optype v_may_defs;
377 v_must_def_optype v_must_defs;
378 unsigned i;
379 edge e;
380 edge_iterator ei;
381 struct loop *loop = bb->loop_father;
383 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
384 rename_def_op (PHI_RESULT_PTR (phi), phi);
386 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
388 stmt = bsi_stmt (bsi);
389 get_stmt_operands (stmt);
390 ann = stmt_ann (stmt);
392 uses = USE_OPS (ann);
393 for (i = 0; i < NUM_USES (uses); i++)
394 rename_use_op (USE_OP_PTR (uses, i));
396 defs = DEF_OPS (ann);
397 for (i = 0; i < NUM_DEFS (defs); i++)
398 rename_def_op (DEF_OP_PTR (defs, i), stmt);
400 vuses = VUSE_OPS (ann);
401 for (i = 0; i < NUM_VUSES (vuses); i++)
402 rename_use_op (VUSE_OP_PTR (vuses, i));
404 v_may_defs = V_MAY_DEF_OPS (ann);
405 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
407 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
408 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
411 v_must_defs = V_MUST_DEF_OPS (ann);
412 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
414 rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
415 rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
419 FOR_EACH_EDGE (e, ei, bb->succs)
421 if (!flow_bb_inside_loop_p (loop, e->dest))
422 continue;
423 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
424 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
429 /* Releases the structures holding the new ssa names. */
431 static void
432 free_new_names (bitmap definitions)
434 unsigned ver;
435 bitmap_iterator bi;
437 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
439 tree def = ssa_name (ver);
441 if (SSA_NAME_AUX (def))
443 free (SSA_NAME_AUX (def));
444 SSA_NAME_AUX (def) = NULL;
450 /* Renames variables in new generated LOOP. */
452 static void
453 rename_variables_in_loop (struct loop *loop)
455 unsigned i;
456 basic_block *bbs;
458 bbs = get_loop_body (loop);
460 for (i = 0; i < loop->num_nodes; i++)
461 rename_variables_in_bb (bbs[i]);
463 free (bbs);
467 /* Update the PHI nodes of NEW_LOOP.
469 NEW_LOOP is a duplicate of ORIG_LOOP.
470 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
471 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
472 executes before it. */
474 static void
475 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
476 struct loop *new_loop, bool after)
478 tree *new_name_ptr, new_ssa_name;
479 tree phi_new, phi_orig;
480 tree def;
481 edge orig_loop_latch = loop_latch_edge (orig_loop);
482 edge orig_entry_e = loop_preheader_edge (orig_loop);
483 edge new_loop_exit_e = new_loop->exit_edges[0];
484 edge new_loop_entry_e = loop_preheader_edge (new_loop);
485 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
488 step 1. For each loop-header-phi:
489 Add the first phi argument for the phi in NEW_LOOP
490 (the one associated with the entry of NEW_LOOP)
492 step 2. For each loop-header-phi:
493 Add the second phi argument for the phi in NEW_LOOP
494 (the one associated with the latch of NEW_LOOP)
496 step 3. Update the phis in the successor block of NEW_LOOP.
498 case 1: NEW_LOOP was placed before ORIG_LOOP:
499 The successor block of NEW_LOOP is the header of ORIG_LOOP.
500 Updating the phis in the successor block can therefore be done
501 along with the scanning of the loop header phis, because the
502 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
503 phi nodes, organized in the same order.
505 case 2: NEW_LOOP was placed after ORIG_LOOP:
506 The successor block of NEW_LOOP is the original exit block of
507 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
508 We postpone updating these phis to a later stage (when
509 loop guards are added).
513 /* Scan the phis in the headers of the old and new loops
514 (they are organized in exactly the same order). */
516 for (phi_new = phi_nodes (new_loop->header),
517 phi_orig = phi_nodes (orig_loop->header);
518 phi_new && phi_orig;
519 phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
521 /* step 1. */
522 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
523 add_phi_arg (phi_new, def, new_loop_entry_e);
525 /* step 2. */
526 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
527 if (TREE_CODE (def) != SSA_NAME)
528 continue;
530 new_name_ptr = SSA_NAME_AUX (def);
531 if (!new_name_ptr)
532 /* Something defined outside of the loop. */
533 continue;
535 /* An ordinary ssa name defined in the loop. */
536 new_ssa_name = *new_name_ptr;
537 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
539 /* step 3 (case 1). */
540 if (!after)
542 gcc_assert (new_loop_exit_e == orig_entry_e);
543 SET_PHI_ARG_DEF (phi_orig,
544 new_loop_exit_e->dest_idx,
545 new_ssa_name);
551 /* Update PHI nodes for a guard of the LOOP.
553 Input:
554 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
555 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
556 originates from the guard-bb, skips LOOP and reaches the (unique) exit
557 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
558 We denote this bb NEW_MERGE_BB because it had a single predecessor (the
559 LOOP header) before the guard code was added, and now it became a merge
560 point of two paths - the path that ends with the LOOP exit-edge, and
561 the path that ends with GUARD_EDGE.
563 This function creates and updates the relevant phi nodes to account for
564 the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
565 1. Create phi nodes at NEW_MERGE_BB.
566 2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
567 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
568 was added:
570 ===> The CFG before the guard-code was added:
571 LOOP_header_bb:
572 if (exit_loop) goto update_bb : LOOP_header_bb
573 update_bb:
575 ==> The CFG after the guard-code was added:
576 guard_bb:
577 if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
578 LOOP_header_bb:
579 if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
580 new_merge_bb:
581 goto update_bb
582 update_bb:
584 - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
585 UPDATE_BB are loop entry phis, like the phis in the LOOP header,
586 organized in the same order.
587 If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
588 loop exit phis.
590 - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
591 "original" loop). FALSE if LOOP is an original loop (not a newly
592 created copy). The SSA_NAME_AUX fields of the defs in the original
593 loop are the corresponding new ssa-names used in the new duplicated
594 loop copy. IS_NEW_LOOP indicates which of the two args of the phi
595 nodes in UPDATE_BB takes the original ssa-name, and which takes the
596 new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
597 the LOOP-exit-edge takes the new-name, and the phi-arg that is
598 associated with GUARD_EDGE takes the original name. If IS_NEW_LOOP is
599 FALSE, it's the other way around.
602 static void
603 slpeel_update_phi_nodes_for_guard (edge guard_edge,
604 struct loop *loop,
605 bool entry_phis,
606 bool is_new_loop)
608 tree orig_phi, new_phi, update_phi;
609 tree guard_arg, loop_arg;
610 basic_block new_merge_bb = guard_edge->dest;
611 edge e = EDGE_SUCC (new_merge_bb, 0);
612 basic_block update_bb = e->dest;
613 basic_block orig_bb = (entry_phis ? loop->header : update_bb);
615 for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
616 orig_phi && update_phi;
617 orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
619 /* 1. Generate new phi node in NEW_MERGE_BB: */
620 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
621 new_merge_bb);
623 /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
624 of LOOP. Set the two phi args in NEW_PHI for these edges: */
625 if (entry_phis)
627 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
628 EDGE_SUCC (loop->latch, 0));
629 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop->entry_edges[0]);
631 else /* exit phis */
633 tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
634 tree *new_name_ptr = SSA_NAME_AUX (orig_def);
635 tree new_name;
637 if (new_name_ptr)
638 new_name = *new_name_ptr;
639 else
640 /* Something defined outside of the loop */
641 new_name = orig_def;
643 if (is_new_loop)
645 guard_arg = orig_def;
646 loop_arg = new_name;
648 else
650 guard_arg = new_name;
651 loop_arg = orig_def;
654 add_phi_arg (new_phi, loop_arg, loop->exit_edges[0]);
655 add_phi_arg (new_phi, guard_arg, guard_edge);
657 /* 3. Update phi in successor block. */
658 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
659 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
660 SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
663 set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
667 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
668 that starts at zero, increases by one and its limit is NITERS.
670 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
672 static void
673 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
675 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
676 tree orig_cond;
677 edge exit_edge = loop->exit_edges[0];
678 block_stmt_iterator loop_cond_bsi;
679 block_stmt_iterator incr_bsi;
680 bool insert_after;
681 tree begin_label = tree_block_label (loop->latch);
682 tree exit_label = tree_block_label (loop->single_exit->dest);
683 tree init = build_int_cst (TREE_TYPE (niters), 0);
684 tree step = build_int_cst (TREE_TYPE (niters), 1);
685 tree then_label;
686 tree else_label;
687 LOC loop_loc;
689 orig_cond = get_loop_exit_condition (loop);
690 #ifdef ENABLE_CHECKING
691 gcc_assert (orig_cond);
692 #endif
693 loop_cond_bsi = bsi_for_stmt (orig_cond);
695 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
696 create_iv (init, step, NULL_TREE, loop,
697 &incr_bsi, insert_after, &indx_before_incr, &indx_after_incr);
699 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
701 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
702 then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
703 else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
705 else /* 'then' edge loops back. */
707 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
708 then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
709 else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
712 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
713 then_label, else_label);
714 bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT);
716 /* Remove old loop exit test: */
717 bsi_remove (&loop_cond_bsi);
719 loop_loc = find_loop_location (loop);
720 if (dump_file && (dump_flags & TDF_DETAILS))
722 if (loop_loc != UNKNOWN_LOC)
723 fprintf (dump_file, "\nloop at %s:%d: ",
724 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
725 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
728 loop->nb_iterations = niters;
732 /* Given LOOP this function generates a new copy of it and puts it
733 on E which is either the entry or exit of LOOP. */
735 static struct loop *
736 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
737 edge e)
739 struct loop *new_loop;
740 basic_block *new_bbs, *bbs;
741 bool at_exit;
742 bool was_imm_dom;
743 basic_block exit_dest;
744 tree phi, phi_arg;
746 at_exit = (e == loop->exit_edges[0]);
747 if (!at_exit && e != loop_preheader_edge (loop))
748 return NULL;
750 bbs = get_loop_body (loop);
752 /* Check whether duplication is possible. */
753 if (!can_copy_bbs_p (bbs, loop->num_nodes))
755 free (bbs);
756 return NULL;
759 /* Generate new loop structure. */
760 new_loop = duplicate_loop (loops, loop, loop->outer);
761 if (!new_loop)
763 free (bbs);
764 return NULL;
767 exit_dest = loop->exit_edges[0]->dest;
768 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
769 exit_dest) == loop->header ?
770 true : false);
772 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
774 copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
776 /* Duplicating phi args at exit bbs as coming
777 also from exit of duplicated loop. */
778 for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
780 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
781 if (phi_arg)
783 edge new_loop_exit_edge;
785 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
786 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
787 else
788 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
790 add_phi_arg (phi, phi_arg, new_loop_exit_edge);
794 if (at_exit) /* Add the loop copy at exit. */
796 redirect_edge_and_branch_force (e, new_loop->header);
797 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
798 if (was_imm_dom)
799 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
801 else /* Add the copy at entry. */
803 edge new_exit_e;
804 edge entry_e = loop_preheader_edge (loop);
805 basic_block preheader = entry_e->src;
807 if (!flow_bb_inside_loop_p (new_loop,
808 EDGE_SUCC (new_loop->header, 0)->dest))
809 new_exit_e = EDGE_SUCC (new_loop->header, 0);
810 else
811 new_exit_e = EDGE_SUCC (new_loop->header, 1);
813 redirect_edge_and_branch_force (new_exit_e, loop->header);
814 set_immediate_dominator (CDI_DOMINATORS, loop->header,
815 new_exit_e->src);
817 /* We have to add phi args to the loop->header here as coming
818 from new_exit_e edge. */
819 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
821 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
822 if (phi_arg)
823 add_phi_arg (phi, phi_arg, new_exit_e);
826 redirect_edge_and_branch_force (entry_e, new_loop->header);
827 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
830 flow_loop_scan (new_loop, LOOP_ALL);
831 flow_loop_scan (loop, LOOP_ALL);
832 free (new_bbs);
833 free (bbs);
835 return new_loop;
839 /* Given the condition statement COND, put it as the last statement
840 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
841 Assumes that this is the single exit of the guarded loop.
842 Returns the skip edge. */
844 static edge
845 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
846 basic_block dom_bb)
848 block_stmt_iterator bsi;
849 edge new_e, enter_e;
850 tree cond_stmt, then_label, else_label;
852 enter_e = EDGE_SUCC (guard_bb, 0);
853 enter_e->flags &= ~EDGE_FALLTHRU;
854 enter_e->flags |= EDGE_FALSE_VALUE;
855 bsi = bsi_last (guard_bb);
857 then_label = build1 (GOTO_EXPR, void_type_node,
858 tree_block_label (exit_bb));
859 else_label = build1 (GOTO_EXPR, void_type_node,
860 tree_block_label (enter_e->dest));
861 cond_stmt = build3 (COND_EXPR, void_type_node, cond,
862 then_label, else_label);
863 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
864 /* Add new edge to connect entry block to the second loop. */
865 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
866 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
867 return new_e;
871 /* This function verifies that the following restrictions apply to LOOP:
872 (1) it is innermost
873 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
874 (3) it is single entry, single exit
875 (4) its exit condition is the last stmt in the header
876 (5) E is the entry/exit edge of LOOP.
879 static bool
880 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
882 edge exit_e = loop->exit_edges [0];
883 edge entry_e = loop_preheader_edge (loop);
884 tree orig_cond = get_loop_exit_condition (loop);
885 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
887 if (any_marked_for_rewrite_p ())
888 return false;
890 if (loop->inner
891 /* All loops have an outer scope; the only case loop->outer is NULL is for
892 the function itself. */
893 || !loop->outer
894 || loop->num_nodes != 2
895 || !empty_block_p (loop->latch)
896 || loop->num_exits != 1
897 || loop->num_entries != 1
898 /* Verify that new loop exit condition can be trivially modified. */
899 || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
900 || (e != exit_e && e != entry_e))
901 return false;
903 return true;
906 #ifdef ENABLE_CHECKING
907 static void
908 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
909 struct loop *second_loop)
911 basic_block loop1_exit_bb = first_loop->exit_edges[0]->dest;
912 basic_block loop2_entry_bb = second_loop->pre_header;
913 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
915 /* A guard that controls whether the second_loop is to be executed or skipped
916 is placed in first_loop->exit. first_loopt->exit therefore has two
917 successors - one is the preheader of second_loop, and the other is a bb
918 after second_loop.
920 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
923 /* 1. Verify that one of the successors of first_loopt->exit is the preheader
924 of second_loop. */
926 /* The preheader of new_loop is expected to have two predessors:
927 first_loop->exit and the block that precedes first_loop. */
929 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
930 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
931 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
932 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
933 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
935 /* Verify that the other successor of first_loopt->exit is after the
936 second_loop. */
937 /* TODO */
939 #endif
941 /* Function slpeel_tree_peel_loop_to_edge.
943 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
944 that is placed on the entry (exit) edge E of LOOP. After this transformation
945 we have two loops one after the other - first-loop iterates FIRST_NITERS
946 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
948 Input:
949 - LOOP: the loop to be peeled.
950 - E: the exit or entry edge of LOOP.
951 If it is the entry edge, we peel the first iterations of LOOP. In this
952 case first-loop is LOOP, and second-loop is the newly created loop.
953 If it is the exit edge, we peel the last iterations of LOOP. In this
954 case, first-loop is the newly created loop, and second-loop is LOOP.
955 - NITERS: the number of iterations that LOOP iterates.
956 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
957 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
958 for updating the loop bound of the first-loop to FIRST_NITERS. If it
959 is false, the caller of this function may want to take care of this
960 (this can be useful if we don't want new stmts added to first-loop).
962 Output:
963 The function returns a pointer to the new loop-copy, or NULL if it failed
964 to perform the transformation.
966 The function generates two if-then-else guards: one before the first loop,
967 and the other before the second loop:
968 The first guard is:
969 if (FIRST_NITERS == 0) then skip the first loop,
970 and go directly to the second loop.
971 The second guard is:
972 if (FIRST_NITERS == NITERS) then skip the second loop.
974 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
975 FORNOW the resulting code will not be in loop-closed-ssa form.
978 struct loop*
979 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
980 edge e, tree first_niters,
981 tree niters, bool update_first_loop_count)
983 struct loop *new_loop = NULL, *first_loop, *second_loop;
984 edge skip_e;
985 tree pre_condition;
986 bitmap definitions;
987 basic_block bb_before_second_loop, bb_after_second_loop;
988 basic_block bb_before_first_loop;
989 basic_block bb_between_loops;
990 edge exit_e = loop->exit_edges [0];
991 LOC loop_loc;
993 if (!slpeel_can_duplicate_loop_p (loop, e))
994 return NULL;
996 /* We have to initialize cfg_hooks. Then, when calling
997 cfg_hooks->split_edge, the function tree_split_edge
998 is actually called and, when calling cfg_hooks->duplicate_block,
999 the function tree_duplicate_bb is called. */
1000 tree_register_cfg_hooks ();
1003 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1004 Resulting CFG would be:
1006 first_loop:
1007 do {
1008 } while ...
1010 second_loop:
1011 do {
1012 } while ...
1014 orig_exit_bb:
1017 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
1019 loop_loc = find_loop_location (loop);
1020 if (dump_file && (dump_flags & TDF_DETAILS))
1022 if (loop_loc != UNKNOWN_LOC)
1023 fprintf (dump_file, "\n%s:%d: note: ",
1024 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1025 fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1027 return NULL;
1030 if (e == exit_e)
1032 /* NEW_LOOP was placed after LOOP. */
1033 first_loop = loop;
1034 second_loop = new_loop;
1036 else
1038 /* NEW_LOOP was placed before LOOP. */
1039 first_loop = new_loop;
1040 second_loop = loop;
1043 definitions = marked_ssa_names ();
1044 allocate_new_names (definitions);
1045 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1046 rename_variables_in_loop (new_loop);
1049 /* 2. Add the guard that controls whether the first loop is executed.
1050 Resulting CFG would be:
1052 bb_before_first_loop:
1053 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1054 GOTO first-loop
1056 first_loop:
1057 do {
1058 } while ...
1060 bb_before_second_loop:
1062 second_loop:
1063 do {
1064 } while ...
1066 orig_exit_bb:
1069 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1070 add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1071 bb_before_second_loop = split_edge (first_loop->exit_edges[0]);
1072 add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1073 flow_loop_scan (first_loop, LOOP_ALL);
1074 flow_loop_scan (second_loop, LOOP_ALL);
1076 pre_condition =
1077 build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
1078 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1079 bb_before_second_loop, bb_before_first_loop);
1080 slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
1081 first_loop == new_loop);
1084 /* 3. Add the guard that controls whether the second loop is executed.
1085 Resulting CFG would be:
1087 bb_before_first_loop:
1088 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1089 GOTO first-loop
1091 first_loop:
1092 do {
1093 } while ...
1095 bb_between_loops:
1096 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1097 GOTO bb_before_second_loop
1099 bb_before_second_loop:
1101 second_loop:
1102 do {
1103 } while ...
1105 bb_after_second_loop:
1107 orig_exit_bb:
1110 bb_between_loops = split_edge (first_loop->exit_edges[0]);
1111 add_bb_to_loop (bb_between_loops, first_loop->outer);
1112 bb_after_second_loop = split_edge (second_loop->exit_edges[0]);
1113 add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1114 flow_loop_scan (first_loop, LOOP_ALL);
1115 flow_loop_scan (second_loop, LOOP_ALL);
1117 pre_condition = build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1118 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1119 bb_after_second_loop, bb_before_first_loop);
1120 slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
1121 second_loop == new_loop);
1123 /* Flow loop scan does not update loop->single_exit field. */
1124 first_loop->single_exit = first_loop->exit_edges[0];
1125 second_loop->single_exit = second_loop->exit_edges[0];
1127 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1129 if (update_first_loop_count)
1130 slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1132 free_new_names (definitions);
1133 BITMAP_XFREE (definitions);
1134 unmark_all_for_rewrite ();
1136 return new_loop;
1139 /* Function vect_get_loop_location.
1141 Extract the location of the loop in the source code.
1142 If the loop is not well formed for vectorization, an estimated
1143 location is calculated.
1144 Return the loop location if succeed and NULL if not. */
1146 static LOC
1147 find_loop_location (struct loop *loop)
1149 tree node = NULL_TREE;
1150 basic_block bb;
1151 block_stmt_iterator si;
1153 if (!loop)
1154 return UNKNOWN_LOC;
1156 node = get_loop_exit_condition (loop);
1158 if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)
1159 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1160 return EXPR_LOC (node);
1162 /* If we got here the loop is probably not "well formed",
1163 try to estimate the loop location */
1165 if (!loop->header)
1166 return UNKNOWN_LOC;
1168 bb = loop->header;
1170 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1172 node = bsi_stmt (si);
1173 if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node))
1174 return EXPR_LOC (node);
1177 return UNKNOWN_LOC;
1181 /*************************************************************************
1182 Vectorization Debug Information.
1183 *************************************************************************/
1185 /* Function vect_set_verbosity_level.
1187 Called from toplev.c upon detection of the
1188 -ftree-vectorizer-verbose=N option. */
1190 void
1191 vect_set_verbosity_level (const char *val)
1193 unsigned int vl;
1195 vl = atoi (val);
1196 if (vl < MAX_VERBOSITY_LEVEL)
1197 vect_verbosity_level = vl;
1198 else
1199 vect_verbosity_level = MAX_VERBOSITY_LEVEL - 1;
1203 /* Function vect_set_dump_settings.
1205 Fix the verbosity level of the vectorizer if the
1206 requested level was not set explicitly using the flag
1207 -ftree-vectorizer-verbose=N.
1208 Decide where to print the debugging information (dump_file/stderr).
1209 If the user defined the verbosity level, but there is no dump file,
1210 print to stderr, otherwise print to the dump file. */
1212 static void
1213 vect_set_dump_settings (void)
1215 vect_dump = dump_file;
1217 /* Check if the verbosity level was defined by the user: */
1218 if (vect_verbosity_level != MAX_VERBOSITY_LEVEL)
1220 /* If there is no dump file, print to stderr. */
1221 if (!dump_file)
1222 vect_dump = stderr;
1223 return;
1226 /* User didn't specify verbosity level: */
1227 if (dump_file && (dump_flags & TDF_DETAILS))
1228 vect_verbosity_level = REPORT_DETAILS;
1229 else if (dump_file && (dump_flags & TDF_STATS))
1230 vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS;
1231 else
1232 vect_verbosity_level = REPORT_NONE;
1234 gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE);
1238 /* Function debug_loop_details.
1240 For vectorization debug dumps. */
1242 static bool
1243 vect_print_dump_info (enum verbosity_levels vl, LOC loc)
1245 if (vl > vect_verbosity_level)
1246 return false;
1248 if (loc == UNKNOWN_LOC)
1249 fprintf (vect_dump, "\n%s:%d: note: ",
1250 DECL_SOURCE_FILE (current_function_decl),
1251 DECL_SOURCE_LINE (current_function_decl));
1252 else
1253 fprintf (vect_dump, "\n%s:%d: note: ", LOC_FILE (loc), LOC_LINE (loc));
1256 return true;
1261 /* Here the proper Vectorizer starts. */
1263 /*************************************************************************
1264 Vectorization Utilities.
1265 *************************************************************************/
1267 /* Function new_stmt_vec_info.
1269 Create and initialize a new stmt_vec_info struct for STMT. */
1271 stmt_vec_info
1272 new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
1274 stmt_vec_info res;
1275 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1277 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1278 STMT_VINFO_STMT (res) = stmt;
1279 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
1280 STMT_VINFO_RELEVANT_P (res) = 0;
1281 STMT_VINFO_VECTYPE (res) = NULL;
1282 STMT_VINFO_VEC_STMT (res) = NULL;
1283 STMT_VINFO_DATA_REF (res) = NULL;
1284 STMT_VINFO_MEMTAG (res) = NULL;
1285 STMT_VINFO_VECT_DR_BASE (res) = NULL;
1286 STMT_VINFO_VECT_INIT_OFFSET (res) = NULL_TREE;
1287 STMT_VINFO_VECT_STEP (res) = NULL_TREE;
1288 STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false;
1289 STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE;
1291 return res;
1295 /* Function new_loop_vec_info.
1297 Create and initialize a new loop_vec_info struct for LOOP, as well as
1298 stmt_vec_info structs for all the stmts in LOOP. */
1300 loop_vec_info
1301 new_loop_vec_info (struct loop *loop)
1303 loop_vec_info res;
1304 basic_block *bbs;
1305 block_stmt_iterator si;
1306 unsigned int i;
1308 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1310 bbs = get_loop_body (loop);
1312 /* Create stmt_info for all stmts in the loop. */
1313 for (i = 0; i < loop->num_nodes; i++)
1315 basic_block bb = bbs[i];
1316 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1318 tree stmt = bsi_stmt (si);
1319 stmt_ann_t ann;
1321 get_stmt_operands (stmt);
1322 ann = stmt_ann (stmt);
1323 set_stmt_info (ann, new_stmt_vec_info (stmt, res));
1327 LOOP_VINFO_LOOP (res) = loop;
1328 LOOP_VINFO_BBS (res) = bbs;
1329 LOOP_VINFO_EXIT_COND (res) = NULL;
1330 LOOP_VINFO_NITERS (res) = NULL;
1331 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1332 LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1333 LOOP_VINFO_VECT_FACTOR (res) = 0;
1334 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1335 "loop_write_datarefs");
1336 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1337 "loop_read_datarefs");
1338 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1339 LOOP_VINFO_LOC (res) = UNKNOWN_LOC;
1341 return res;
1345 /* Function destroy_loop_vec_info.
1347 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1348 stmts in the loop. */
1350 void
1351 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1353 struct loop *loop;
1354 basic_block *bbs;
1355 int nbbs;
1356 block_stmt_iterator si;
1357 int j;
1359 if (!loop_vinfo)
1360 return;
1362 loop = LOOP_VINFO_LOOP (loop_vinfo);
1364 bbs = LOOP_VINFO_BBS (loop_vinfo);
1365 nbbs = loop->num_nodes;
1367 for (j = 0; j < nbbs; j++)
1369 basic_block bb = bbs[j];
1370 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1372 tree stmt = bsi_stmt (si);
1373 stmt_ann_t ann = stmt_ann (stmt);
1374 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1375 free (stmt_info);
1376 set_stmt_info (ann, NULL);
1380 free (LOOP_VINFO_BBS (loop_vinfo));
1381 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1382 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1384 free (loop_vinfo);
1388 /* Function vect_get_ptr_offset
1390 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
1392 static tree
1393 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1394 tree vectype ATTRIBUTE_UNUSED,
1395 tree *offset ATTRIBUTE_UNUSED)
1397 /* TODO: Use alignment information. */
1398 return NULL_TREE;
1402 /* Function vect_strip_conversions
1404 Strip conversions that don't narrow the mode. */
1406 static tree
1407 vect_strip_conversion (tree expr)
1409 tree to, ti, oprnd0;
1411 while (TREE_CODE (expr) == NOP_EXPR || TREE_CODE (expr) == CONVERT_EXPR)
1413 to = TREE_TYPE (expr);
1414 oprnd0 = TREE_OPERAND (expr, 0);
1415 ti = TREE_TYPE (oprnd0);
1417 if (!INTEGRAL_TYPE_P (to) || !INTEGRAL_TYPE_P (ti))
1418 return NULL_TREE;
1419 if (GET_MODE_SIZE (TYPE_MODE (to)) < GET_MODE_SIZE (TYPE_MODE (ti)))
1420 return NULL_TREE;
1422 expr = oprnd0;
1424 return expr;
1428 /* Function vect_analyze_offset_expr
1430 Given an offset expression EXPR received from get_inner_reference, analyze
1431 it and create an expression for INITIAL_OFFSET by substituting the variables
1432 of EXPR with initial_condition of the corresponding access_fn in the loop.
1433 E.g.,
1434 for i
1435 for (j = 3; j < N; j++)
1436 a[j].b[i][j] = 0;
1438 For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be
1439 substituted, since its access_fn in the inner loop is i. 'j' will be
1440 substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where
1441 C` = 3 * C_j + C.
1443 Compute MISALIGN (the misalignment of the data reference initial access from
1444 its base) if possible. Misalignment can be calculated only if all the
1445 variables can be substituted with constants, or if a variable is multiplied
1446 by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot
1447 be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple
1448 of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo
1449 VECTYPE_ALIGNMENT computation in the caller of this function).
1451 STEP is an evolution of the data reference in this loop in bytes.
1452 In the above example, STEP is C_j.
1454 Return FALSE, if the analysis fails, e.g., there is no access_fn for a
1455 variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP)
1456 are NULL_TREEs. Otherwise, return TRUE.
1460 static bool
1461 vect_analyze_offset_expr (tree expr,
1462 struct loop *loop,
1463 tree vectype_alignment,
1464 tree *initial_offset,
1465 tree *misalign,
1466 tree *step)
1468 tree oprnd0;
1469 tree oprnd1;
1470 tree left_offset = ssize_int (0);
1471 tree right_offset = ssize_int (0);
1472 tree left_misalign = ssize_int (0);
1473 tree right_misalign = ssize_int (0);
1474 tree left_step = ssize_int (0);
1475 tree right_step = ssize_int (0);
1476 enum tree_code code;
1477 tree init, evolution;
1479 *step = NULL_TREE;
1480 *misalign = NULL_TREE;
1481 *initial_offset = NULL_TREE;
1483 /* Strip conversions that don't narrow the mode. */
1484 expr = vect_strip_conversion (expr);
1485 if (!expr)
1486 return false;
1488 /* Stop conditions:
1489 1. Constant. */
1490 if (TREE_CODE (expr) == INTEGER_CST)
1492 *initial_offset = fold_convert (ssizetype, expr);
1493 *misalign = fold_convert (ssizetype, expr);
1494 *step = ssize_int (0);
1495 return true;
1498 /* 2. Variable. Try to substitute with initial_condition of the corresponding
1499 access_fn in the current loop. */
1500 if (SSA_VAR_P (expr))
1502 tree access_fn = analyze_scalar_evolution (loop, expr);
1504 if (access_fn == chrec_dont_know)
1505 /* No access_fn. */
1506 return false;
1508 init = initial_condition_in_loop_num (access_fn, loop->num);
1509 if (init == expr && !expr_invariant_in_loop_p (loop, init))
1510 /* Not enough information: may be not loop invariant.
1511 E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its
1512 initial_condition is D, but it depends on i - loop's induction
1513 variable. */
1514 return false;
1516 evolution = evolution_part_in_loop_num (access_fn, loop->num);
1517 if (evolution && TREE_CODE (evolution) != INTEGER_CST)
1518 /* Evolution is not constant. */
1519 return false;
1521 if (TREE_CODE (init) == INTEGER_CST)
1522 *misalign = fold_convert (ssizetype, init);
1523 else
1524 /* Not constant, misalignment cannot be calculated. */
1525 *misalign = NULL_TREE;
1527 *initial_offset = fold_convert (ssizetype, init);
1529 *step = evolution ? fold_convert (ssizetype, evolution) : ssize_int (0);
1530 return true;
1533 /* Recursive computation. */
1534 if (!BINARY_CLASS_P (expr))
1536 /* We expect to get binary expressions (PLUS/MINUS and MULT). */
1537 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1539 fprintf (vect_dump, "Not binary expression ");
1540 print_generic_expr (vect_dump, expr, TDF_SLIM);
1542 return false;
1544 oprnd0 = TREE_OPERAND (expr, 0);
1545 oprnd1 = TREE_OPERAND (expr, 1);
1547 if (!vect_analyze_offset_expr (oprnd0, loop, vectype_alignment, &left_offset,
1548 &left_misalign, &left_step)
1549 || !vect_analyze_offset_expr (oprnd1, loop, vectype_alignment,
1550 &right_offset, &right_misalign, &right_step))
1551 return false;
1553 /* The type of the operation: plus, minus or mult. */
1554 code = TREE_CODE (expr);
1555 switch (code)
1557 case MULT_EXPR:
1558 if (TREE_CODE (right_offset) != INTEGER_CST)
1559 /* RIGHT_OFFSET can be not constant. For example, for arrays of variable
1560 sized types.
1561 FORNOW: We don't support such cases. */
1562 return false;
1564 /* Strip conversions that don't narrow the mode. */
1565 left_offset = vect_strip_conversion (left_offset);
1566 if (!left_offset)
1567 return false;
1568 /* Misalignment computation. */
1569 if (SSA_VAR_P (left_offset))
1571 /* If the left side contains variables that can't be substituted with
1572 constants, we check if the right side is a multiple of ALIGNMENT.
1574 if (integer_zerop (size_binop (TRUNC_MOD_EXPR, right_offset,
1575 fold_convert (ssizetype, vectype_alignment))))
1576 *misalign = ssize_int (0);
1577 else
1578 /* If the remainder is not zero or the right side isn't constant,
1579 we can't compute misalignment. */
1580 *misalign = NULL_TREE;
1582 else
1584 /* The left operand was successfully substituted with constant. */
1585 if (left_misalign)
1586 /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is
1587 NULL_TREE. */
1588 *misalign = size_binop (code, left_misalign, right_misalign);
1589 else
1590 *misalign = NULL_TREE;
1593 /* Step calculation. */
1594 /* Multiply the step by the right operand. */
1595 *step = size_binop (MULT_EXPR, left_step, right_offset);
1596 break;
1598 case PLUS_EXPR:
1599 case MINUS_EXPR:
1600 /* Combine the recursive calculations for step and misalignment. */
1601 *step = size_binop (code, left_step, right_step);
1603 if (left_misalign && right_misalign)
1604 *misalign = size_binop (code, left_misalign, right_misalign);
1605 else
1606 *misalign = NULL_TREE;
1608 break;
1610 default:
1611 gcc_unreachable ();
1614 /* Compute offset. */
1615 *initial_offset = fold_convert (ssizetype,
1616 fold (build2 (code, TREE_TYPE (left_offset),
1617 left_offset,
1618 right_offset)));
1619 return true;
1623 /* Function vect_get_base_and_offset
1625 Return the BASE of the data reference EXPR.
1626 If VECTYPE is given, also compute the INITIAL_OFFSET from BASE, MISALIGN and
1627 STEP.
1628 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset
1629 'a.b[i] + 4B' from a (can be an expression), MISALIGN is an OFFSET
1630 instantiated with initial_conditions of access_functions of variables,
1631 modulo alignment, and STEP is the evolution of the DR_REF in this loop.
1633 Function get_inner_reference is used for the above in case of ARRAY_REF and
1634 COMPONENT_REF.
1636 Input:
1637 EXPR - the memory reference that is being analyzed
1638 DR - the data_reference struct of the _original_ memory reference
1639 (Note: DR_REF (DR) is not necessarily EXPR)
1640 VECTYPE - the type that defines the alignment (i.e, we compute
1641 alignment relative to TYPE_ALIGN(VECTYPE))
1643 Output:
1644 BASE (returned value) - the base of the data reference EXPR.
1645 E.g, if EXPR is a.b[k].c[i][j] the returned
1646 base is a.
1647 INITIAL_OFFSET - initial offset of EXPR from BASE (an expression)
1648 MISALIGN - offset of EXPR from BASE in bytes (a constant) or NULL_TREE if the
1649 computation is impossible
1650 STEP - evolution of the DR_REF in the loop
1651 BASE_ALIGNED_P - indicates if BASE is aligned
1653 If something unexpected is encountered (an unsupported form of data-ref),
1654 then NULL_TREE is returned. */
1656 static tree
1657 vect_get_base_and_offset (struct data_reference *dr,
1658 tree expr,
1659 tree vectype,
1660 loop_vec_info loop_vinfo,
1661 tree *initial_offset,
1662 tree *misalign,
1663 tree *step,
1664 bool *base_aligned_p)
1666 tree this_offset = ssize_int (0);
1667 tree this_misalign = ssize_int (0);
1668 tree this_step = ssize_int (0);
1669 tree base = NULL_TREE;
1670 tree next_ref;
1671 tree oprnd0, oprnd1;
1672 enum tree_code code = TREE_CODE (expr);
1673 HOST_WIDE_INT pbitsize;
1674 HOST_WIDE_INT pbitpos;
1675 tree poffset;
1676 enum machine_mode pmode;
1677 int punsignedp, pvolatilep;
1678 tree bit_pos_in_bytes;
1679 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1681 *base_aligned_p = false;
1683 switch (code)
1685 /* These cases end the recursion: */
1686 case VAR_DECL:
1687 case PARM_DECL:
1688 *initial_offset = ssize_int (0);
1689 *step = ssize_int (0);
1690 *misalign = ssize_int (0);
1691 if (DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1692 *base_aligned_p = true;
1693 return expr;
1695 case SSA_NAME:
1696 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1697 return NULL_TREE;
1699 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1701 base = vect_get_ptr_offset (expr, vectype, misalign);
1702 if (base)
1703 *base_aligned_p = true;
1705 else
1707 *base_aligned_p = true;
1708 *misalign = ssize_int (0);
1710 *initial_offset = ssize_int (0);
1711 *step = ssize_int (0);
1712 return expr;
1714 case INTEGER_CST:
1715 *initial_offset = fold_convert (ssizetype, expr);
1716 *misalign = fold_convert (ssizetype, expr);
1717 *step = ssize_int (0);
1718 return expr;
1720 /* These cases continue the recursion: */
1721 case ADDR_EXPR:
1722 oprnd0 = TREE_OPERAND (expr, 0);
1723 next_ref = oprnd0;
1724 break;
1726 case INDIRECT_REF:
1727 oprnd0 = TREE_OPERAND (expr, 0);
1728 next_ref = oprnd0;
1729 break;
1731 case PLUS_EXPR:
1732 case MINUS_EXPR:
1733 oprnd0 = TREE_OPERAND (expr, 0);
1734 oprnd1 = TREE_OPERAND (expr, 1);
1736 /* In case we have a PLUS_EXPR of the form
1737 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1738 This is verified in vect_get_memtag_and_dr. */
1739 base = vect_get_base_and_offset (dr, oprnd1, vectype, loop_vinfo,
1740 &this_offset, &this_misalign,
1741 &this_step, base_aligned_p);
1742 /* Offset was already computed in vect_analyze_pointer_ref_access. */
1743 this_offset = ssize_int (0);
1745 if (!base)
1746 this_misalign = NULL_TREE;
1747 else
1748 this_misalign = size_binop (TREE_CODE (expr), ssize_int (0),
1749 this_misalign);
1750 next_ref = oprnd0;
1751 break;
1753 default:
1754 if (!handled_component_p (expr))
1755 /* Unsupported expression. */
1756 return NULL_TREE;
1758 /* Find the base and the offset from it. */
1759 next_ref = get_inner_reference (expr, &pbitsize, &pbitpos, &poffset,
1760 &pmode, &punsignedp, &pvolatilep, false);
1761 if (!next_ref)
1762 return NULL_TREE;
1764 if (poffset
1765 && !vect_analyze_offset_expr (poffset, loop, TYPE_SIZE_UNIT (vectype),
1766 &this_offset, &this_misalign,
1767 &this_step))
1769 /* Failed to compute offset or step. */
1770 *step = NULL_TREE;
1771 *initial_offset = NULL_TREE;
1772 *misalign = NULL_TREE;
1773 return NULL_TREE;
1776 /* Add bit position to OFFSET and MISALIGN. */
1778 bit_pos_in_bytes = ssize_int (pbitpos/BITS_PER_UNIT);
1779 /* Check that there is no remainder in bits. */
1780 if (pbitpos%BITS_PER_UNIT)
1782 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1783 fprintf (vect_dump, "bit offset alignment.");
1784 return NULL_TREE;
1786 this_offset = size_binop (PLUS_EXPR, bit_pos_in_bytes,
1787 fold_convert (ssizetype, this_offset));
1788 if (this_misalign)
1789 this_misalign = size_binop (PLUS_EXPR, this_misalign, bit_pos_in_bytes);
1791 /* Continue the recursion to refine the base (get_inner_reference returns
1792 &a for &a[i], and not a). */
1793 break;
1796 base = vect_get_base_and_offset (dr, next_ref, vectype, loop_vinfo,
1797 initial_offset, misalign, step,
1798 base_aligned_p);
1799 if (base)
1801 /* Combine the results. */
1802 if (this_misalign && *misalign)
1803 *misalign = size_binop (PLUS_EXPR, *misalign, this_misalign);
1804 else
1805 *misalign = NULL_TREE;
1807 *step = size_binop (PLUS_EXPR, *step, this_step);
1809 *initial_offset = size_binop (PLUS_EXPR, *initial_offset, this_offset);
1811 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1813 print_generic_expr (vect_dump, expr, TDF_SLIM);
1814 fprintf (vect_dump, "\n --> total offset for ref: ");
1815 print_generic_expr (vect_dump, *initial_offset, TDF_SLIM);
1816 fprintf (vect_dump, "\n --> total misalign for ref: ");
1817 print_generic_expr (vect_dump, *misalign, TDF_SLIM);
1818 fprintf (vect_dump, "\n --> total step for ref: ");
1819 print_generic_expr (vect_dump, *step, TDF_SLIM);
1822 return base;
1826 /* Function vect_force_dr_alignment_p.
1828 Returns whether the alignment of a DECL can be forced to be aligned
1829 on ALIGNMENT bit boundary. */
1831 static bool
1832 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1834 if (TREE_CODE (decl) != VAR_DECL)
1835 return false;
1837 if (DECL_EXTERNAL (decl))
1838 return false;
1840 if (TREE_ASM_WRITTEN (decl))
1841 return false;
1843 if (TREE_STATIC (decl))
1844 return (alignment <= MAX_OFILE_ALIGNMENT);
1845 else
1846 /* This is not 100% correct. The absolute correct stack alignment
1847 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1848 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1849 However, until someone implements forced stack alignment, SSE
1850 isn't really usable without this. */
1851 return (alignment <= PREFERRED_STACK_BOUNDARY);
1855 /* Function vect_get_new_vect_var.
1857 Returns a name for a new variable. The current naming scheme appends the
1858 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1859 the name of vectorizer generated variables, and appends that to NAME if
1860 provided. */
1862 static tree
1863 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1865 const char *prefix;
1866 int prefix_len;
1867 tree new_vect_var;
1869 if (var_kind == vect_simple_var)
1870 prefix = "vect_";
1871 else
1872 prefix = "vect_p";
1874 prefix_len = strlen (prefix);
1876 if (name)
1877 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1878 else
1879 new_vect_var = create_tmp_var (type, prefix);
1881 return new_vect_var;
1885 /* Function vect_create_index_for_vector_ref.
1887 Create (and return) an index variable, along with it's update chain in the
1888 loop. This variable will be used to access a memory location in a vector
1889 operation.
1891 Input:
1892 LOOP: The loop being vectorized.
1893 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1894 function can be added here, or in the loop pre-header.
1896 Output:
1897 Return an index that will be used to index a vector array. It is expected
1898 that a pointer to the first vector will be used as the base address for the
1899 indexed reference.
1901 FORNOW: we are not trying to be efficient, just creating a new index each
1902 time from scratch. At this time all vector references could use the same
1903 index.
1905 TODO: create only one index to be used by all vector references. Record
1906 the index in the LOOP_VINFO the first time this procedure is called and
1907 return it on subsequent calls. The increment of this index must be placed
1908 just before the conditional expression that ends the single block loop. */
1910 static tree
1911 vect_create_index_for_vector_ref (loop_vec_info loop_vinfo)
1913 tree init, step;
1914 block_stmt_iterator incr_bsi;
1915 bool insert_after;
1916 tree indx_before_incr, indx_after_incr;
1917 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1918 tree incr;
1920 /* It is assumed that the base pointer used for vectorized access contains
1921 the address of the first vector. Therefore the index used for vectorized
1922 access must be initialized to zero and incremented by 1. */
1924 init = integer_zero_node;
1925 step = integer_one_node;
1927 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
1928 create_iv (init, step, NULL_TREE, loop, &incr_bsi, insert_after,
1929 &indx_before_incr, &indx_after_incr);
1930 incr = bsi_stmt (incr_bsi);
1931 get_stmt_operands (incr);
1932 set_stmt_info (stmt_ann (incr), new_stmt_vec_info (incr, loop_vinfo));
1934 return indx_before_incr;
1938 /* Function vect_create_addr_base_for_vector_ref.
1940 Create an expression that computes the address of the first memory location
1941 that will be accessed for a data reference.
1943 Input:
1944 STMT: The statement containing the data reference.
1945 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1946 OFFSET: Optional. If supplied, it is be added to the initial address.
1948 Output:
1949 1. Return an SSA_NAME whose value is the address of the memory location of
1950 the first vector of the data reference.
1951 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1952 these statement(s) which define the returned SSA_NAME.
1954 FORNOW: We are only handling array accesses with step 1. */
1956 static tree
1957 vect_create_addr_base_for_vector_ref (tree stmt,
1958 tree *new_stmt_list,
1959 tree offset)
1961 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1962 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1963 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1964 tree base_name = unshare_expr (DR_BASE_NAME (dr));
1965 tree ref = DR_REF (dr);
1966 tree scalar_type = TREE_TYPE (ref);
1967 tree scalar_ptr_type = build_pointer_type (scalar_type);
1968 tree vec_stmt;
1969 tree new_temp;
1970 tree addr_base, addr_expr;
1971 tree dest, new_stmt;
1972 tree base_offset = unshare_expr (STMT_VINFO_VECT_INIT_OFFSET (stmt_info));
1974 if (TREE_CODE (TREE_TYPE (data_ref_base)) != POINTER_TYPE)
1975 /* After the analysis stage, we expect to get here only with RECORD_TYPE
1976 and ARRAY_TYPE. */
1977 /* Add '&' to ref_base. */
1978 data_ref_base = build_fold_addr_expr (data_ref_base);
1979 else
1981 /* Create '(scalar_type*) base' for pointers. */
1982 tree dest, new_stmt, new_temp, vec_stmt, tmp_base;
1983 tree scalar_array_type = build_array_type (scalar_type, 0);
1984 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1985 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1986 add_referenced_tmp_var (array_ptr);
1988 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1989 add_referenced_tmp_var (dest);
1990 tmp_base = force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1991 append_to_statement_list_force (new_stmt, new_stmt_list);
1993 vec_stmt = fold_convert (scalar_array_ptr_type, tmp_base);
1994 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1995 new_temp = make_ssa_name (array_ptr, vec_stmt);
1996 TREE_OPERAND (vec_stmt, 0) = new_temp;
1997 append_to_statement_list_force (vec_stmt, new_stmt_list);
1998 data_ref_base = new_temp;
2001 /* Create base_offset */
2002 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
2003 add_referenced_tmp_var (dest);
2004 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
2005 append_to_statement_list_force (new_stmt, new_stmt_list);
2007 if (offset)
2009 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
2010 add_referenced_tmp_var (tmp);
2011 offset = fold (build2 (MULT_EXPR, TREE_TYPE (offset), offset,
2012 STMT_VINFO_VECT_STEP (stmt_info)));
2013 base_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (base_offset),
2014 base_offset, offset));
2015 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
2016 append_to_statement_list_force (new_stmt, new_stmt_list);
2019 /* base + base_offset */
2020 addr_base = fold (build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
2021 base_offset));
2023 /* addr_expr = addr_base */
2024 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
2025 get_name (base_name));
2026 add_referenced_tmp_var (addr_expr);
2027 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
2028 new_temp = make_ssa_name (addr_expr, vec_stmt);
2029 TREE_OPERAND (vec_stmt, 0) = new_temp;
2030 append_to_statement_list_force (vec_stmt, new_stmt_list);
2032 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2034 fprintf (vect_dump, "created ");
2035 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2037 return new_temp;
2041 /* Function get_vectype_for_scalar_type.
2043 Returns the vector type corresponding to SCALAR_TYPE as supported
2044 by the target. */
2046 static tree
2047 get_vectype_for_scalar_type (tree scalar_type)
2049 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
2050 int nbytes = GET_MODE_SIZE (inner_mode);
2051 int nunits;
2052 tree vectype;
2054 if (nbytes == 0)
2055 return NULL_TREE;
2057 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
2058 is expected. */
2059 nunits = UNITS_PER_SIMD_WORD / nbytes;
2061 vectype = build_vector_type (scalar_type, nunits);
2062 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2064 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
2065 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
2068 if (!vectype)
2069 return NULL_TREE;
2071 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2073 fprintf (vect_dump, "vectype: ");
2074 print_generic_expr (vect_dump, vectype, TDF_SLIM);
2077 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2079 /* TODO: tree-complex.c sometimes can parallelize operations
2080 on generic vectors. We can vectorize the loop in that case,
2081 but then we should re-run the lowering pass. */
2082 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2083 fprintf (vect_dump, "mode not supported by target.");
2084 return NULL_TREE;
2087 return vectype;
2091 /* Function vect_align_data_ref.
2093 Handle mislignment of a memory accesses.
2095 FORNOW: Can't handle misaligned accesses.
2096 Make sure that the dataref is aligned. */
2098 static void
2099 vect_align_data_ref (tree stmt)
2101 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2102 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2104 /* FORNOW: can't handle misaligned accesses;
2105 all accesses expected to be aligned. */
2106 gcc_assert (aligned_access_p (dr));
2110 /* Function vect_create_data_ref_ptr.
2112 Create a memory reference expression for vector access, to be used in a
2113 vector load/store stmt. The reference is based on a new pointer to vector
2114 type (vp).
2116 Input:
2117 1. STMT: a stmt that references memory. Expected to be of the form
2118 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
2119 2. BSI: block_stmt_iterator where new stmts can be added.
2120 3. OFFSET (optional): an offset to be added to the initial address accessed
2121 by the data-ref in STMT.
2122 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2123 pointing to the initial address.
2125 Output:
2126 1. Declare a new ptr to vector_type, and have it point to the base of the
2127 data reference (initial addressed accessed by the data reference).
2128 For example, for vector of type V8HI, the following code is generated:
2130 v8hi *vp;
2131 vp = (v8hi *)initial_address;
2133 if OFFSET is not supplied:
2134 initial_address = &a[init];
2135 if OFFSET is supplied:
2136 initial_address = &a[init + OFFSET];
2138 Return the initial_address in INITIAL_ADDRESS.
2140 2. Create a data-reference in the loop based on the new vector pointer vp,
2141 and using a new index variable 'idx' as follows:
2143 vp' = vp + update
2145 where if ONLY_INIT is true:
2146 update = zero
2147 and otherwise
2148 update = idx + vector_type_size
2150 Return the pointer vp'.
2153 FORNOW: handle only aligned and consecutive accesses. */
2155 static tree
2156 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
2157 tree *initial_address, bool only_init)
2159 tree base_name;
2160 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2161 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2162 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2163 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2164 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2165 tree vect_ptr_type;
2166 tree vect_ptr;
2167 tree tag;
2168 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
2169 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
2170 vuse_optype vuses = STMT_VUSE_OPS (stmt);
2171 int nvuses, nv_may_defs, nv_must_defs;
2172 int i;
2173 tree new_temp;
2174 tree vec_stmt;
2175 tree new_stmt_list = NULL_TREE;
2176 tree idx;
2177 edge pe = loop_preheader_edge (loop);
2178 basic_block new_bb;
2179 tree vect_ptr_init;
2180 tree vectype_size;
2181 tree ptr_update;
2182 tree data_ref_ptr;
2183 tree type, tmp, size;
2185 base_name = unshare_expr (DR_BASE_NAME (dr));
2186 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2188 tree data_ref_base = base_name;
2189 fprintf (vect_dump, "create array_ref of type: ");
2190 print_generic_expr (vect_dump, vectype, TDF_SLIM);
2191 if (TREE_CODE (data_ref_base) == VAR_DECL)
2192 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
2193 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
2194 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
2195 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2196 fprintf (vect_dump, " vectorizing a record based array ref: ");
2197 else if (TREE_CODE (data_ref_base) == SSA_NAME)
2198 fprintf (vect_dump, " vectorizing a pointer ref: ");
2199 print_generic_expr (vect_dump, base_name, TDF_SLIM);
2202 /** (1) Create the new vector-pointer variable: **/
2204 vect_ptr_type = build_pointer_type (vectype);
2205 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2206 get_name (base_name));
2207 add_referenced_tmp_var (vect_ptr);
2210 /** (2) Handle aliasing information of the new vector-pointer: **/
2212 tag = STMT_VINFO_MEMTAG (stmt_info);
2213 gcc_assert (tag);
2214 get_var_ann (vect_ptr)->type_mem_tag = tag;
2216 /* Mark for renaming all aliased variables
2217 (i.e, the may-aliases of the type-mem-tag). */
2218 nvuses = NUM_VUSES (vuses);
2219 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
2220 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
2221 for (i = 0; i < nvuses; i++)
2223 tree use = VUSE_OP (vuses, i);
2224 if (TREE_CODE (use) == SSA_NAME)
2225 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
2227 for (i = 0; i < nv_may_defs; i++)
2229 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
2230 if (TREE_CODE (def) == SSA_NAME)
2231 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
2233 for (i = 0; i < nv_must_defs; i++)
2235 tree def = V_MUST_DEF_RESULT (v_must_defs, i);
2236 if (TREE_CODE (def) == SSA_NAME)
2237 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
2241 /** (3) Calculate the initial address the vector-pointer, and set
2242 the vector-pointer to point to it before the loop: **/
2244 /* Create: (&(base[init_val+offset]) in the loop preheader. */
2245 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
2246 offset);
2247 pe = loop_preheader_edge (loop);
2248 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
2249 gcc_assert (!new_bb);
2250 *initial_address = new_temp;
2252 /* Create: p = (vectype *) initial_base */
2253 vec_stmt = fold_convert (vect_ptr_type, new_temp);
2254 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
2255 new_temp = make_ssa_name (vect_ptr, vec_stmt);
2256 TREE_OPERAND (vec_stmt, 0) = new_temp;
2257 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
2258 gcc_assert (!new_bb);
2259 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
2262 /** (4) Handle the updating of the vector-pointer inside the loop: **/
2264 if (only_init) /* No update in loop is required. */
2265 return vect_ptr_init;
2267 idx = vect_create_index_for_vector_ref (loop_vinfo);
2269 /* Create: update = idx * vectype_size */
2270 tmp = create_tmp_var (integer_type_node, "update");
2271 add_referenced_tmp_var (tmp);
2272 size = TYPE_SIZE (vect_ptr_type);
2273 type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2274 ptr_update = create_tmp_var (type, "update");
2275 add_referenced_tmp_var (ptr_update);
2276 vectype_size = TYPE_SIZE_UNIT (vectype);
2277 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
2278 vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
2279 new_temp = make_ssa_name (tmp, vec_stmt);
2280 TREE_OPERAND (vec_stmt, 0) = new_temp;
2281 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2282 vec_stmt = fold_convert (type, new_temp);
2283 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
2284 new_temp = make_ssa_name (ptr_update, vec_stmt);
2285 TREE_OPERAND (vec_stmt, 0) = new_temp;
2286 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2288 /* Create: data_ref_ptr = vect_ptr_init + update */
2289 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
2290 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
2291 new_temp = make_ssa_name (vect_ptr, vec_stmt);
2292 TREE_OPERAND (vec_stmt, 0) = new_temp;
2293 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2294 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
2296 return data_ref_ptr;
2300 /* Function vect_create_destination_var.
2302 Create a new temporary of type VECTYPE. */
2304 static tree
2305 vect_create_destination_var (tree scalar_dest, tree vectype)
2307 tree vec_dest;
2308 const char *new_name;
2310 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2312 new_name = get_name (scalar_dest);
2313 if (!new_name)
2314 new_name = "var_";
2315 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
2316 add_referenced_tmp_var (vec_dest);
2318 return vec_dest;
2322 /* Function vect_init_vector.
2324 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
2325 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
2326 used in the vectorization of STMT. */
2328 static tree
2329 vect_init_vector (tree stmt, tree vector_var)
2331 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2332 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2333 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2334 tree new_var;
2335 tree init_stmt;
2336 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2337 tree vec_oprnd;
2338 edge pe;
2339 tree new_temp;
2340 basic_block new_bb;
2342 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
2343 add_referenced_tmp_var (new_var);
2345 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
2346 new_temp = make_ssa_name (new_var, init_stmt);
2347 TREE_OPERAND (init_stmt, 0) = new_temp;
2349 pe = loop_preheader_edge (loop);
2350 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
2351 gcc_assert (!new_bb);
2353 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2355 fprintf (vect_dump, "created new init_stmt: ");
2356 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
2359 vec_oprnd = TREE_OPERAND (init_stmt, 0);
2360 return vec_oprnd;
2364 /* Function vect_get_vec_def_for_operand.
2366 OP is an operand in STMT. This function returns a (vector) def that will be
2367 used in the vectorized stmt for STMT.
2369 In the case that OP is an SSA_NAME which is defined in the loop, then
2370 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
2372 In case OP is an invariant or constant, a new stmt that creates a vector def
2373 needs to be introduced. */
2375 static tree
2376 vect_get_vec_def_for_operand (tree op, tree stmt)
2378 tree vec_oprnd;
2379 tree vec_stmt;
2380 tree def_stmt;
2381 stmt_vec_info def_stmt_info = NULL;
2382 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2383 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2384 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2385 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2386 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2387 basic_block bb;
2388 tree vec_inv;
2389 tree t = NULL_TREE;
2390 tree def;
2391 int i;
2393 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2395 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
2396 print_generic_expr (vect_dump, op, TDF_SLIM);
2399 /** ===> Case 1: operand is a constant. **/
2401 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2403 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
2405 tree vec_cst;
2407 /* Build a tree with vector elements. */
2408 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2409 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
2411 for (i = nunits - 1; i >= 0; --i)
2413 t = tree_cons (NULL_TREE, op, t);
2415 vec_cst = build_vector (vectype, t);
2416 return vect_init_vector (stmt, vec_cst);
2419 gcc_assert (TREE_CODE (op) == SSA_NAME);
2421 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
2423 def_stmt = SSA_NAME_DEF_STMT (op);
2424 def_stmt_info = vinfo_for_stmt (def_stmt);
2426 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2428 fprintf (vect_dump, "vect_get_vec_def_for_operand: def_stmt: ");
2429 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
2433 /** ==> Case 2.1: operand is defined inside the loop. **/
2435 if (def_stmt_info)
2437 /* Get the def from the vectorized stmt. */
2439 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2440 gcc_assert (vec_stmt);
2441 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2442 return vec_oprnd;
2446 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2447 it is a reduction/induction. **/
2449 bb = bb_for_stmt (def_stmt);
2450 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2452 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2453 fprintf (vect_dump, "reduction/induction - unsupported.");
2454 internal_error ("no support for reduction/induction"); /* FORNOW */
2458 /** ==> Case 2.3: operand is defined outside the loop -
2459 it is a loop invariant. */
2461 switch (TREE_CODE (def_stmt))
2463 case PHI_NODE:
2464 def = PHI_RESULT (def_stmt);
2465 break;
2466 case MODIFY_EXPR:
2467 def = TREE_OPERAND (def_stmt, 0);
2468 break;
2469 case NOP_EXPR:
2470 def = TREE_OPERAND (def_stmt, 0);
2471 gcc_assert (IS_EMPTY_STMT (def_stmt));
2472 def = op;
2473 break;
2474 default:
2475 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2477 fprintf (vect_dump, "unsupported defining stmt: ");
2478 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
2480 internal_error ("unsupported defining stmt");
2483 /* Build a tree with vector elements.
2484 Create 'vec_inv = {inv,inv,..,inv}' */
2486 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2487 fprintf (vect_dump, "Create vector_inv.");
2489 for (i = nunits - 1; i >= 0; --i)
2491 t = tree_cons (NULL_TREE, def, t);
2494 vec_inv = build_constructor (vectype, t);
2495 return vect_init_vector (stmt, vec_inv);
2499 /* Function vect_finish_stmt_generation.
2501 Insert a new stmt. */
2503 static void
2504 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2506 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2508 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2510 fprintf (vect_dump, "add new stmt: ");
2511 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2514 #ifdef ENABLE_CHECKING
2515 /* Make sure bsi points to the stmt that is being vectorized. */
2516 gcc_assert (stmt == bsi_stmt (*bsi));
2517 #endif
2519 #ifdef USE_MAPPED_LOCATION
2520 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCUS (stmt));
2521 #else
2522 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
2523 #endif
2527 /* Function vectorizable_assignment.
2529 Check if STMT performs an assignment (copy) that can be vectorized.
2530 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2531 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2532 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2534 static bool
2535 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2537 tree vec_dest;
2538 tree scalar_dest;
2539 tree op;
2540 tree vec_oprnd;
2541 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2542 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2543 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2544 tree new_temp;
2546 /* Is vectorizable assignment? */
2548 if (TREE_CODE (stmt) != MODIFY_EXPR)
2549 return false;
2551 scalar_dest = TREE_OPERAND (stmt, 0);
2552 if (TREE_CODE (scalar_dest) != SSA_NAME)
2553 return false;
2555 op = TREE_OPERAND (stmt, 1);
2556 if (!vect_is_simple_use (op, loop_vinfo, NULL))
2558 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2559 fprintf (vect_dump, "use not simple.");
2560 return false;
2563 if (!vec_stmt) /* transformation not required. */
2565 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2566 return true;
2569 /** Transform. **/
2570 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2571 fprintf (vect_dump, "transform assignment.");
2573 /* Handle def. */
2574 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2576 /* Handle use. */
2577 op = TREE_OPERAND (stmt, 1);
2578 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2580 /* Arguments are ready. create the new vector stmt. */
2581 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2582 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2583 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2584 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2586 return true;
2590 /* Function vectorizable_operation.
2592 Check if STMT performs a binary or unary operation that can be vectorized.
2593 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2594 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2595 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2597 static bool
2598 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2600 tree vec_dest;
2601 tree scalar_dest;
2602 tree operation;
2603 tree op0, op1 = NULL;
2604 tree vec_oprnd0, vec_oprnd1=NULL;
2605 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2606 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2607 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2608 int i;
2609 enum tree_code code;
2610 enum machine_mode vec_mode;
2611 tree new_temp;
2612 int op_type;
2613 tree op;
2614 optab optab;
2616 /* Is STMT a vectorizable binary/unary operation? */
2617 if (TREE_CODE (stmt) != MODIFY_EXPR)
2618 return false;
2620 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2621 return false;
2623 operation = TREE_OPERAND (stmt, 1);
2624 code = TREE_CODE (operation);
2625 optab = optab_for_tree_code (code, vectype);
2627 /* Support only unary or binary operations. */
2628 op_type = TREE_CODE_LENGTH (code);
2629 if (op_type != unary_op && op_type != binary_op)
2631 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2632 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
2633 return false;
2636 for (i = 0; i < op_type; i++)
2638 op = TREE_OPERAND (operation, i);
2639 if (!vect_is_simple_use (op, loop_vinfo, NULL))
2641 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2642 fprintf (vect_dump, "use not simple.");
2643 return false;
2647 /* Supportable by target? */
2648 if (!optab)
2650 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2651 fprintf (vect_dump, "no optab.");
2652 return false;
2654 vec_mode = TYPE_MODE (vectype);
2655 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2657 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2658 fprintf (vect_dump, "op not supported by target.");
2659 return false;
2662 if (!vec_stmt) /* transformation not required. */
2664 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2665 return true;
2668 /** Transform. **/
2670 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2671 fprintf (vect_dump, "transform binary/unary operation.");
2673 /* Handle def. */
2674 scalar_dest = TREE_OPERAND (stmt, 0);
2675 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2677 /* Handle uses. */
2678 op0 = TREE_OPERAND (operation, 0);
2679 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2681 if (op_type == binary_op)
2683 op1 = TREE_OPERAND (operation, 1);
2684 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2687 /* Arguments are ready. create the new vector stmt. */
2689 if (op_type == binary_op)
2690 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2691 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2692 else
2693 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2694 build1 (code, vectype, vec_oprnd0));
2695 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2696 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2697 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2699 return true;
2703 /* Function vectorizable_store.
2705 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2706 can be vectorized.
2707 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2708 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2709 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2711 static bool
2712 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2714 tree scalar_dest;
2715 tree data_ref;
2716 tree op;
2717 tree vec_oprnd1;
2718 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2719 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2720 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2721 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2722 enum machine_mode vec_mode;
2723 tree dummy;
2724 enum dr_alignment_support alignment_support_cheme;
2726 /* Is vectorizable store? */
2728 if (TREE_CODE (stmt) != MODIFY_EXPR)
2729 return false;
2731 scalar_dest = TREE_OPERAND (stmt, 0);
2732 if (TREE_CODE (scalar_dest) != ARRAY_REF
2733 && TREE_CODE (scalar_dest) != INDIRECT_REF)
2734 return false;
2736 op = TREE_OPERAND (stmt, 1);
2737 if (!vect_is_simple_use (op, loop_vinfo, NULL))
2739 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2740 fprintf (vect_dump, "use not simple.");
2741 return false;
2744 vec_mode = TYPE_MODE (vectype);
2745 /* FORNOW. In some cases can vectorize even if data-type not supported
2746 (e.g. - array initialization with 0). */
2747 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2748 return false;
2750 if (!STMT_VINFO_DATA_REF (stmt_info))
2751 return false;
2754 if (!vec_stmt) /* transformation not required. */
2756 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2757 return true;
2760 /** Transform. **/
2762 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2763 fprintf (vect_dump, "transform store");
2765 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2766 gcc_assert (alignment_support_cheme);
2767 gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */
2769 /* Handle use - get the vectorized def from the defining stmt. */
2770 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2772 /* Handle def. */
2773 /* FORNOW: make sure the data reference is aligned. */
2774 vect_align_data_ref (stmt);
2775 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2776 data_ref = build_fold_indirect_ref (data_ref);
2778 /* Arguments are ready. create the new vector stmt. */
2779 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2780 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2782 return true;
2786 /* vectorizable_load.
2788 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2789 can be vectorized.
2790 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2791 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2792 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2794 static bool
2795 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2797 tree scalar_dest;
2798 tree vec_dest = NULL;
2799 tree data_ref = NULL;
2800 tree op;
2801 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2802 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2803 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2804 tree new_temp;
2805 int mode;
2806 tree init_addr;
2807 tree new_stmt;
2808 tree dummy;
2809 basic_block new_bb;
2810 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2811 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2812 edge pe = loop_preheader_edge (loop);
2813 enum dr_alignment_support alignment_support_cheme;
2815 /* Is vectorizable load? */
2817 if (TREE_CODE (stmt) != MODIFY_EXPR)
2818 return false;
2820 scalar_dest = TREE_OPERAND (stmt, 0);
2821 if (TREE_CODE (scalar_dest) != SSA_NAME)
2822 return false;
2824 op = TREE_OPERAND (stmt, 1);
2825 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2826 return false;
2828 if (!STMT_VINFO_DATA_REF (stmt_info))
2829 return false;
2831 mode = (int) TYPE_MODE (vectype);
2833 /* FORNOW. In some cases can vectorize even if data-type not supported
2834 (e.g. - data copies). */
2835 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2837 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2838 fprintf (vect_dump, "Aligned load, but unsupported type.");
2839 return false;
2842 if (!vec_stmt) /* transformation not required. */
2844 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2845 return true;
2848 /** Transform. **/
2850 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2851 fprintf (vect_dump, "transform load.");
2853 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2854 gcc_assert (alignment_support_cheme);
2856 if (alignment_support_cheme == dr_aligned
2857 || alignment_support_cheme == dr_unaligned_supported)
2859 /* Create:
2860 p = initial_addr;
2861 indx = 0;
2862 loop {
2863 vec_dest = *(p);
2864 indx = indx + 1;
2868 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2869 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2870 if (aligned_access_p (dr))
2871 data_ref = build_fold_indirect_ref (data_ref);
2872 else
2874 int mis = DR_MISALIGNMENT (dr);
2875 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
2876 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
2877 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2879 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2880 new_temp = make_ssa_name (vec_dest, new_stmt);
2881 TREE_OPERAND (new_stmt, 0) = new_temp;
2882 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2884 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2886 /* Create:
2887 p1 = initial_addr;
2888 msq_init = *(floor(p1))
2889 p2 = initial_addr + VS - 1;
2890 magic = have_builtin ? builtin_result : initial_address;
2891 indx = 0;
2892 loop {
2893 p2' = p2 + indx * vectype_size
2894 lsq = *(floor(p2'))
2895 vec_dest = realign_load (msq, lsq, magic)
2896 indx = indx + 1;
2897 msq = lsq;
2901 tree offset;
2902 tree magic;
2903 tree phi_stmt;
2904 tree msq_init;
2905 tree msq, lsq;
2906 tree dataref_ptr;
2907 tree params;
2909 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
2910 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2911 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2912 &init_addr, true);
2913 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2914 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2915 new_temp = make_ssa_name (vec_dest, new_stmt);
2916 TREE_OPERAND (new_stmt, 0) = new_temp;
2917 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2918 gcc_assert (!new_bb);
2919 msq_init = TREE_OPERAND (new_stmt, 0);
2922 /* <2> Create lsq = *(floor(p2')) in the loop */
2923 offset = build_int_cst (integer_type_node,
2924 GET_MODE_NUNITS (TYPE_MODE (vectype)));
2925 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2926 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2927 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2928 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2929 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2930 new_temp = make_ssa_name (vec_dest, new_stmt);
2931 TREE_OPERAND (new_stmt, 0) = new_temp;
2932 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2933 lsq = TREE_OPERAND (new_stmt, 0);
2936 /* <3> */
2937 if (targetm.vectorize.builtin_mask_for_load)
2939 /* Create permutation mask, if required, in loop preheader. */
2940 tree builtin_decl;
2941 params = build_tree_list (NULL_TREE, init_addr);
2942 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2943 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2944 new_stmt = build_function_call_expr (builtin_decl, params);
2945 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2946 new_temp = make_ssa_name (vec_dest, new_stmt);
2947 TREE_OPERAND (new_stmt, 0) = new_temp;
2948 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2949 gcc_assert (!new_bb);
2950 magic = TREE_OPERAND (new_stmt, 0);
2952 /* Since we have just created a CALL_EXPR, we may need to
2953 rename call-clobbered variables. */
2954 mark_call_clobbered_vars_to_rename ();
2956 else
2958 /* Use current address instead of init_addr for reduced reg pressure.
2960 magic = dataref_ptr;
2964 /* <4> Create msq = phi <msq_init, lsq> in loop */
2965 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2966 msq = make_ssa_name (vec_dest, NULL_TREE);
2967 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2968 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2969 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
2970 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
2973 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
2974 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2975 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2976 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2977 new_temp = make_ssa_name (vec_dest, new_stmt);
2978 TREE_OPERAND (new_stmt, 0) = new_temp;
2979 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2981 else
2982 gcc_unreachable ();
2984 *vec_stmt = new_stmt;
2985 return true;
2989 /* Function vect_supportable_dr_alignment
2991 Return whether the data reference DR is supported with respect to its
2992 alignment. */
2994 static enum dr_alignment_support
2995 vect_supportable_dr_alignment (struct data_reference *dr)
2997 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2998 enum machine_mode mode = (int) TYPE_MODE (vectype);
3000 if (aligned_access_p (dr))
3001 return dr_aligned;
3003 /* Possibly unaligned access. */
3005 if (DR_IS_READ (dr))
3007 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
3008 && (!targetm.vectorize.builtin_mask_for_load
3009 || targetm.vectorize.builtin_mask_for_load ()))
3010 return dr_unaligned_software_pipeline;
3012 if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
3013 /* Can't software pipeline the loads, but can at least do them. */
3014 return dr_unaligned_supported;
3017 /* Unsupported. */
3018 return dr_unaligned_unsupported;
3022 /* Function vect_transform_stmt.
3024 Create a vectorized stmt to replace STMT, and insert it at BSI. */
3026 static bool
3027 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
3029 bool is_store = false;
3030 tree vec_stmt = NULL_TREE;
3031 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3032 bool done;
3034 switch (STMT_VINFO_TYPE (stmt_info))
3036 case op_vec_info_type:
3037 done = vectorizable_operation (stmt, bsi, &vec_stmt);
3038 gcc_assert (done);
3039 break;
3041 case assignment_vec_info_type:
3042 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
3043 gcc_assert (done);
3044 break;
3046 case load_vec_info_type:
3047 done = vectorizable_load (stmt, bsi, &vec_stmt);
3048 gcc_assert (done);
3049 break;
3051 case store_vec_info_type:
3052 done = vectorizable_store (stmt, bsi, &vec_stmt);
3053 gcc_assert (done);
3054 is_store = true;
3055 break;
3056 default:
3057 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3058 fprintf (vect_dump, "stmt not supported.");
3059 gcc_unreachable ();
3062 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
3064 return is_store;
3068 /* This function builds ni_name = number of iterations loop executes
3069 on the loop preheader. */
3071 static tree
3072 vect_build_loop_niters (loop_vec_info loop_vinfo)
3074 tree ni_name, stmt, var;
3075 edge pe;
3076 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3077 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
3079 var = create_tmp_var (TREE_TYPE (ni), "niters");
3080 add_referenced_tmp_var (var);
3081 ni_name = force_gimple_operand (ni, &stmt, false, var);
3083 pe = loop_preheader_edge (loop);
3084 if (stmt)
3086 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3087 gcc_assert (!new_bb);
3090 return ni_name;
3094 /* This function generates the following statements:
3096 ni_name = number of iterations loop executes
3097 ratio = ni_name / vf
3098 ratio_mult_vf_name = ratio * vf
3100 and places them at the loop preheader edge. */
3102 static void
3103 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
3104 tree *ni_name_ptr,
3105 tree *ratio_mult_vf_name_ptr,
3106 tree *ratio_name_ptr)
3109 edge pe;
3110 basic_block new_bb;
3111 tree stmt, ni_name;
3112 tree var;
3113 tree ratio_name;
3114 tree ratio_mult_vf_name;
3115 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3116 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
3117 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3118 tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
3120 pe = loop_preheader_edge (loop);
3122 /* Generate temporary variable that contains
3123 number of iterations loop executes. */
3125 ni_name = vect_build_loop_niters (loop_vinfo);
3127 /* Create: ratio = ni >> log2(vf) */
3129 var = create_tmp_var (TREE_TYPE (ni), "bnd");
3130 add_referenced_tmp_var (var);
3131 ratio_name = make_ssa_name (var, NULL_TREE);
3132 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
3133 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
3134 SSA_NAME_DEF_STMT (ratio_name) = stmt;
3136 pe = loop_preheader_edge (loop);
3137 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3138 gcc_assert (!new_bb);
3140 /* Create: ratio_mult_vf = ratio << log2 (vf). */
3142 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
3143 add_referenced_tmp_var (var);
3144 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
3145 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
3146 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
3147 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
3149 pe = loop_preheader_edge (loop);
3150 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3151 gcc_assert (!new_bb);
3153 *ni_name_ptr = ni_name;
3154 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
3155 *ratio_name_ptr = ratio_name;
3157 return;
3161 /* Function vect_update_ivs_after_vectorizer.
3163 "Advance" the induction variables of LOOP to the value they should take
3164 after the execution of LOOP. This is currently necessary because the
3165 vectorizer does not handle induction variables that are used after the
3166 loop. Such a situation occurs when the last iterations of LOOP are
3167 peeled, because:
3168 1. We introduced new uses after LOOP for IVs that were not originally used
3169 after LOOP: the IVs of LOOP are now used by an epilog loop.
3170 2. LOOP is going to be vectorized; this means that it will iterate N/VF
3171 times, whereas the loop IVs should be bumped N times.
3173 Input:
3174 - LOOP - a loop that is going to be vectorized. The last few iterations
3175 of LOOP were peeled.
3176 - NITERS - the number of iterations that LOOP executes (before it is
3177 vectorized). i.e, the number of times the ivs should be bumped.
3178 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
3179 coming out from LOOP on which there are uses of the LOOP ivs
3180 (this is the path from LOOP->exit to epilog_loop->preheader).
3182 The new definitions of the ivs are placed in LOOP->exit.
3183 The phi args associated with the edge UPDATE_E in the bb
3184 UPDATE_E->dest are updated accordingly.
3186 Assumption 1: Like the rest of the vectorizer, this function assumes
3187 a single loop exit that has a single predecessor.
3189 Assumption 2: The phi nodes in the LOOP header and in update_bb are
3190 organized in the same order.
3192 Assumption 3: The access function of the ivs is simple enough (see
3193 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
3195 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
3196 coming out of LOOP on which the ivs of LOOP are used (this is the path
3197 that leads to the epilog loop; other paths skip the epilog loop). This
3198 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
3199 needs to have its phis updated.
3202 static void
3203 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
3204 edge update_e)
3206 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3207 basic_block exit_bb = loop->exit_edges[0]->dest;
3208 tree phi, phi1;
3209 basic_block update_bb = update_e->dest;
3211 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
3213 /* Make sure there exists a single-predecessor exit bb: */
3214 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
3216 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
3217 phi && phi1;
3218 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
3220 tree access_fn = NULL;
3221 tree evolution_part;
3222 tree init_expr;
3223 tree step_expr;
3224 tree var, stmt, ni, ni_name;
3225 block_stmt_iterator last_bsi;
3227 /* Skip virtual phi's. */
3228 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3230 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3231 fprintf (vect_dump, "virtual phi. skip.");
3232 continue;
3235 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
3236 gcc_assert (access_fn);
3237 evolution_part =
3238 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
3239 gcc_assert (evolution_part != NULL_TREE);
3241 /* FORNOW: We do not support IVs whose evolution function is a polynomial
3242 of degree >= 2 or exponential. */
3243 gcc_assert (!tree_is_chrec (evolution_part));
3245 step_expr = evolution_part;
3246 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
3247 loop->num));
3249 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
3250 build2 (MULT_EXPR, TREE_TYPE (niters),
3251 niters, step_expr), init_expr);
3253 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
3254 add_referenced_tmp_var (var);
3256 ni_name = force_gimple_operand (ni, &stmt, false, var);
3258 /* Insert stmt into exit_bb. */
3259 last_bsi = bsi_last (exit_bb);
3260 if (stmt)
3261 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
3263 /* Fix phi expressions in the successor bb. */
3264 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
3265 PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
3266 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
3271 /* Function vect_do_peeling_for_loop_bound
3273 Peel the last iterations of the loop represented by LOOP_VINFO.
3274 The peeled iterations form a new epilog loop. Given that the loop now
3275 iterates NITERS times, the new epilog loop iterates
3276 NITERS % VECTORIZATION_FACTOR times.
3278 The original loop will later be made to iterate
3279 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
3281 static void
3282 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
3283 struct loops *loops)
3286 tree ni_name, ratio_mult_vf_name;
3287 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3288 struct loop *new_loop;
3289 edge update_e;
3290 #ifdef ENABLE_CHECKING
3291 int loop_num;
3292 #endif
3294 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3295 fprintf (vect_dump, "=== vect_transtorm_for_unknown_loop_bound ===");
3297 /* Generate the following variables on the preheader of original loop:
3299 ni_name = number of iteration the original loop executes
3300 ratio = ni_name / vf
3301 ratio_mult_vf_name = ratio * vf */
3302 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
3303 &ratio_mult_vf_name, ratio);
3305 /* Update loop info. */
3306 loop->pre_header = loop_preheader_edge (loop)->src;
3307 loop->pre_header_edges[0] = loop_preheader_edge (loop);
3309 #ifdef ENABLE_CHECKING
3310 loop_num = loop->num;
3311 #endif
3312 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0],
3313 ratio_mult_vf_name, ni_name, false);
3314 #ifdef ENABLE_CHECKING
3315 gcc_assert (new_loop);
3316 gcc_assert (loop_num == loop->num);
3317 slpeel_verify_cfg_after_peeling (loop, new_loop);
3318 #endif
3320 /* A guard that controls whether the new_loop is to be executed or skipped
3321 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
3322 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
3323 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
3324 is on the path where the LOOP IVs are used and need to be updated. */
3326 if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest)
3327 update_e = EDGE_PRED (new_loop->pre_header, 0);
3328 else
3329 update_e = EDGE_PRED (new_loop->pre_header, 1);
3331 /* Update IVs of original loop as if they were advanced
3332 by ratio_mult_vf_name steps. */
3333 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
3335 /* After peeling we have to reset scalar evolution analyzer. */
3336 scev_reset ();
3338 return;
3342 /* Function vect_gen_niters_for_prolog_loop
3344 Set the number of iterations for the loop represented by LOOP_VINFO
3345 to the minimum between LOOP_NITERS (the original iteration count of the loop)
3346 and the misalignment of DR - the first data reference recorded in
3347 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
3348 this loop, the data reference DR will refer to an aligned location.
3350 The following computation is generated:
3352 compute address misalignment in bytes:
3353 addr_mis = addr & (vectype_size - 1)
3355 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
3357 (elem_size = element type size; an element is the scalar element
3358 whose type is the inner type of the vectype) */
3360 static tree
3361 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
3363 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3364 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3365 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3366 tree var, stmt;
3367 tree iters, iters_name;
3368 edge pe;
3369 basic_block new_bb;
3370 tree dr_stmt = DR_STMT (dr);
3371 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3372 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3373 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
3374 tree elem_misalign;
3375 tree byte_misalign;
3376 tree new_stmts = NULL_TREE;
3377 tree start_addr =
3378 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
3379 tree ptr_type = TREE_TYPE (start_addr);
3380 tree size = TYPE_SIZE (ptr_type);
3381 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
3382 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
3383 tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
3384 tree niters_type = TREE_TYPE (loop_niters);
3385 tree elem_size_log =
3386 build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
3387 tree vf_tree = build_int_cst (unsigned_type_node, vf);
3389 pe = loop_preheader_edge (loop);
3390 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
3391 gcc_assert (!new_bb);
3393 /* Create: byte_misalign = addr & (vectype_size - 1) */
3394 byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
3396 /* Create: elem_misalign = byte_misalign / element_size */
3397 elem_misalign =
3398 build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
3400 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
3401 iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
3402 iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
3403 iters = fold_convert (niters_type, iters);
3405 /* Create: prolog_loop_niters = min (iters, loop_niters) */
3406 /* If the loop bound is known at compile time we already verified that it is
3407 greater than vf; since the misalignment ('iters') is at most vf, there's
3408 no need to generate the MIN_EXPR in this case. */
3409 if (TREE_CODE (loop_niters) != INTEGER_CST)
3410 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
3412 var = create_tmp_var (niters_type, "prolog_loop_niters");
3413 add_referenced_tmp_var (var);
3414 iters_name = force_gimple_operand (iters, &stmt, false, var);
3416 /* Insert stmt on loop preheader edge. */
3417 pe = loop_preheader_edge (loop);
3418 if (stmt)
3420 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3421 gcc_assert (!new_bb);
3424 return iters_name;
3428 /* Function vect_update_inits_of_dr
3430 NITERS iterations were peeled from LOOP. DR represents a data reference
3431 in LOOP. This function updates the information recorded in DR to
3432 account for the fact that the first NITERS iterations had already been
3433 executed. Specifically, it updates the OFFSET field of stmt_info. */
3435 static void
3436 vect_update_inits_of_dr (struct data_reference *dr, tree niters)
3438 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
3439 tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info);
3441 niters = fold (build2 (MULT_EXPR, TREE_TYPE (niters), niters,
3442 STMT_VINFO_VECT_STEP (stmt_info)));
3443 offset = fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters));
3444 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
3448 /* Function vect_update_inits_of_drs
3450 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3451 This function updates the information recorded for the data references in
3452 the loop to account for the fact that the first NITERS iterations had
3453 already been executed. Specifically, it updates the initial_condition of the
3454 access_function of all the data_references in the loop. */
3456 static void
3457 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3459 unsigned int i;
3460 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3461 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3463 if (vect_dump && (dump_flags & TDF_DETAILS))
3464 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
3466 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3468 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3469 vect_update_inits_of_dr (dr, niters);
3472 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3474 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3475 vect_update_inits_of_dr (dr, niters);
3480 /* Function vect_do_peeling_for_alignment
3482 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3483 'niters' is set to the misalignment of one of the data references in the
3484 loop, thereby forcing it to refer to an aligned location at the beginning
3485 of the execution of this loop. The data reference for which we are
3486 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
3488 static void
3489 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3491 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3492 tree niters_of_prolog_loop, ni_name;
3493 tree n_iters;
3494 struct loop *new_loop;
3496 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3497 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
3499 ni_name = vect_build_loop_niters (loop_vinfo);
3500 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3502 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
3503 new_loop =
3504 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
3505 niters_of_prolog_loop, ni_name, true);
3506 #ifdef ENABLE_CHECKING
3507 gcc_assert (new_loop);
3508 slpeel_verify_cfg_after_peeling (new_loop, loop);
3509 #endif
3511 /* Update number of times loop executes. */
3512 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3513 LOOP_VINFO_NITERS (loop_vinfo) =
3514 build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
3516 /* Update the init conditions of the access functions of all data refs. */
3517 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3519 /* After peeling we have to reset scalar evolution analyzer. */
3520 scev_reset ();
3522 return;
3526 /* Function vect_transform_loop.
3528 The analysis phase has determined that the loop is vectorizable.
3529 Vectorize the loop - created vectorized stmts to replace the scalar
3530 stmts in the loop, and update the loop exit condition. */
3532 static void
3533 vect_transform_loop (loop_vec_info loop_vinfo,
3534 struct loops *loops ATTRIBUTE_UNUSED)
3536 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3537 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3538 int nbbs = loop->num_nodes;
3539 block_stmt_iterator si;
3540 int i;
3541 tree ratio = NULL;
3542 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3544 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3545 fprintf (vect_dump, "=== vec_transform_loop ===");
3548 /* Peel the loop if there are data refs with unknown alignment.
3549 Only one data ref with unknown store is allowed. */
3551 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3552 vect_do_peeling_for_alignment (loop_vinfo, loops);
3554 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3555 compile time constant), or it is a constant that doesn't divide by the
3556 vectorization factor, then an epilog loop needs to be created.
3557 We therefore duplicate the loop: the original loop will be vectorized,
3558 and will compute the first (n/VF) iterations. The second copy of the loop
3559 will remain scalar and will compute the remaining (n%VF) iterations.
3560 (VF is the vectorization factor). */
3562 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3563 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3564 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3565 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3566 else
3567 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3568 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
3570 /* 1) Make sure the loop header has exactly two entries
3571 2) Make sure we have a preheader basic block. */
3573 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3575 loop_split_edge_with (loop_preheader_edge (loop), NULL);
3578 /* FORNOW: the vectorizer supports only loops which body consist
3579 of one basic block (header + empty latch). When the vectorizer will
3580 support more involved loop forms, the order by which the BBs are
3581 traversed need to be reconsidered. */
3583 for (i = 0; i < nbbs; i++)
3585 basic_block bb = bbs[i];
3587 for (si = bsi_start (bb); !bsi_end_p (si);)
3589 tree stmt = bsi_stmt (si);
3590 stmt_vec_info stmt_info;
3591 bool is_store;
3593 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3595 fprintf (vect_dump, "------>vectorizing statement: ");
3596 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3598 stmt_info = vinfo_for_stmt (stmt);
3599 gcc_assert (stmt_info);
3600 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3602 bsi_next (&si);
3603 continue;
3605 #ifdef ENABLE_CHECKING
3606 /* FORNOW: Verify that all stmts operate on the same number of
3607 units and no inner unrolling is necessary. */
3608 gcc_assert
3609 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3610 == vectorization_factor);
3611 #endif
3612 /* -------- vectorize statement ------------ */
3613 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3614 fprintf (vect_dump, "transform statement.");
3616 is_store = vect_transform_stmt (stmt, &si);
3617 if (is_store)
3619 /* free the attached stmt_vec_info and remove the stmt. */
3620 stmt_ann_t ann = stmt_ann (stmt);
3621 free (stmt_info);
3622 set_stmt_info (ann, NULL);
3623 bsi_remove (&si);
3624 continue;
3627 bsi_next (&si);
3628 } /* stmts in BB */
3629 } /* BBs in loop */
3631 slpeel_make_loop_iterate_ntimes (loop, ratio);
3633 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, LOOP_LOC (loop_vinfo)))
3634 fprintf (vect_dump, "LOOP VECTORIZED.");
3638 /* Function vect_is_simple_use.
3640 Input:
3641 LOOP - the loop that is being vectorized.
3642 OPERAND - operand of a stmt in LOOP.
3643 DEF - the defining stmt in case OPERAND is an SSA_NAME.
3645 Returns whether a stmt with OPERAND can be vectorized.
3646 Supportable operands are constants, loop invariants, and operands that are
3647 defined by the current iteration of the loop. Unsupportable operands are
3648 those that are defined by a previous iteration of the loop (as is the case
3649 in reduction/induction computations). */
3651 static bool
3652 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def)
3654 tree def_stmt;
3655 basic_block bb;
3656 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3658 if (def)
3659 *def = NULL_TREE;
3661 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3662 return true;
3664 if (TREE_CODE (operand) != SSA_NAME)
3665 return false;
3667 def_stmt = SSA_NAME_DEF_STMT (operand);
3668 if (def_stmt == NULL_TREE )
3670 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3671 fprintf (vect_dump, "no def_stmt.");
3672 return false;
3675 /* empty stmt is expected only in case of a function argument.
3676 (Otherwise - we expect a phi_node or a modify_expr). */
3677 if (IS_EMPTY_STMT (def_stmt))
3679 tree arg = TREE_OPERAND (def_stmt, 0);
3680 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3681 return true;
3682 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3684 fprintf (vect_dump, "Unexpected empty stmt: ");
3685 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
3687 return false;
3690 /* phi_node inside the loop indicates an induction/reduction pattern.
3691 This is not supported yet. */
3692 bb = bb_for_stmt (def_stmt);
3693 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3695 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3696 fprintf (vect_dump, "reduction/induction - unsupported.");
3697 return false; /* FORNOW: not supported yet. */
3700 /* Expecting a modify_expr or a phi_node. */
3701 if (TREE_CODE (def_stmt) == MODIFY_EXPR
3702 || TREE_CODE (def_stmt) == PHI_NODE)
3704 if (def)
3705 *def = def_stmt;
3706 return true;
3709 return false;
3713 /* Function vect_analyze_operations.
3715 Scan the loop stmts and make sure they are all vectorizable. */
3717 static bool
3718 vect_analyze_operations (loop_vec_info loop_vinfo)
3720 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3721 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3722 int nbbs = loop->num_nodes;
3723 block_stmt_iterator si;
3724 unsigned int vectorization_factor = 0;
3725 int i;
3726 bool ok;
3727 tree scalar_type;
3729 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3730 fprintf (vect_dump, "=== vect_analyze_operations ===");
3732 for (i = 0; i < nbbs; i++)
3734 basic_block bb = bbs[i];
3736 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3738 tree stmt = bsi_stmt (si);
3739 unsigned int nunits;
3740 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3741 tree vectype;
3743 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3745 fprintf (vect_dump, "==> examining statement: ");
3746 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3749 gcc_assert (stmt_info);
3751 /* skip stmts which do not need to be vectorized.
3752 this is expected to include:
3753 - the COND_EXPR which is the loop exit condition
3754 - any LABEL_EXPRs in the loop
3755 - computations that are used only for array indexing or loop
3756 control */
3758 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3760 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3761 fprintf (vect_dump, "irrelevant.");
3762 continue;
3765 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3767 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
3768 LOOP_LOC (loop_vinfo)))
3770 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
3771 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3773 return false;
3776 if (STMT_VINFO_DATA_REF (stmt_info))
3777 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3778 else if (TREE_CODE (stmt) == MODIFY_EXPR)
3779 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3780 else
3781 scalar_type = TREE_TYPE (stmt);
3783 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3785 fprintf (vect_dump, "get vectype for scalar type: ");
3786 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
3789 vectype = get_vectype_for_scalar_type (scalar_type);
3790 if (!vectype)
3792 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
3793 LOOP_LOC (loop_vinfo)))
3795 fprintf (vect_dump,
3796 "not vectorized: unsupported data-type ");
3797 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
3799 return false;
3802 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3804 fprintf (vect_dump, "vectype: ");
3805 print_generic_expr (vect_dump, vectype, TDF_SLIM);
3807 STMT_VINFO_VECTYPE (stmt_info) = vectype;
3809 ok = (vectorizable_operation (stmt, NULL, NULL)
3810 || vectorizable_assignment (stmt, NULL, NULL)
3811 || vectorizable_load (stmt, NULL, NULL)
3812 || vectorizable_store (stmt, NULL, NULL));
3814 if (!ok)
3816 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
3817 LOOP_LOC (loop_vinfo)))
3819 fprintf (vect_dump, "not vectorized: stmt not supported: ");
3820 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3822 return false;
3825 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3826 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3827 fprintf (vect_dump, "nunits = %d", nunits);
3829 if (vectorization_factor)
3831 /* FORNOW: don't allow mixed units.
3832 This restriction will be relaxed in the future. */
3833 if (nunits != vectorization_factor)
3835 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
3836 LOOP_LOC (loop_vinfo)))
3837 fprintf (vect_dump, "not vectorized: mixed data-types");
3838 return false;
3841 else
3842 vectorization_factor = nunits;
3844 #ifdef ENABLE_CHECKING
3845 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3846 * vectorization_factor == UNITS_PER_SIMD_WORD);
3847 #endif
3851 /* TODO: Analyze cost. Decide if worth while to vectorize. */
3853 if (vectorization_factor <= 1)
3855 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
3856 LOOP_LOC (loop_vinfo)))
3857 fprintf (vect_dump, "not vectorized: unsupported data-type");
3858 return false;
3860 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3862 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3863 && vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3864 fprintf (vect_dump,
3865 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3866 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3868 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3869 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
3871 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
3872 LOOP_LOC (loop_vinfo)))
3873 fprintf (vect_dump, "not vectorized: iteration count too small.");
3874 return false;
3877 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3878 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3880 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
3881 fprintf (vect_dump, "epilog loop required.");
3882 if (!vect_can_advance_ivs_p (loop_vinfo))
3884 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
3885 LOOP_LOC (loop_vinfo)))
3886 fprintf (vect_dump,
3887 "not vectorized: can't create epilog loop 1.");
3888 return false;
3890 if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0]))
3892 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
3893 LOOP_LOC (loop_vinfo)))
3894 fprintf (vect_dump,
3895 "not vectorized: can't create epilog loop 2.");
3896 return false;
3900 return true;
3904 /* Function exist_non_indexing_operands_for_use_p
3906 USE is one of the uses attached to STMT. Check if USE is
3907 used in STMT for anything other than indexing an array. */
3909 static bool
3910 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3912 tree operand;
3913 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3915 /* USE corresponds to some operand in STMT. If there is no data
3916 reference in STMT, then any operand that corresponds to USE
3917 is not indexing an array. */
3918 if (!STMT_VINFO_DATA_REF (stmt_info))
3919 return true;
3921 /* STMT has a data_ref. FORNOW this means that its of one of
3922 the following forms:
3923 -1- ARRAY_REF = var
3924 -2- var = ARRAY_REF
3925 (This should have been verified in analyze_data_refs).
3927 'var' in the second case corresponds to a def, not a use,
3928 so USE cannot correspond to any operands that are not used
3929 for array indexing.
3931 Therefore, all we need to check is if STMT falls into the
3932 first case, and whether var corresponds to USE. */
3934 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3935 return false;
3937 operand = TREE_OPERAND (stmt, 1);
3939 if (TREE_CODE (operand) != SSA_NAME)
3940 return false;
3942 if (operand == use)
3943 return true;
3945 return false;
3949 /* Function vect_is_simple_iv_evolution.
3951 FORNOW: A simple evolution of an induction variables in the loop is
3952 considered a polynomial evolution with constant step. */
3954 static bool
3955 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3956 tree * step, bool strict)
3958 tree init_expr;
3959 tree step_expr;
3961 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3963 /* When there is no evolution in this loop, the evolution function
3964 is not "simple". */
3965 if (evolution_part == NULL_TREE)
3966 return false;
3968 /* When the evolution is a polynomial of degree >= 2
3969 the evolution function is not "simple". */
3970 if (tree_is_chrec (evolution_part))
3971 return false;
3973 step_expr = evolution_part;
3974 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
3975 loop_nb));
3977 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3979 fprintf (vect_dump, "step: ");
3980 print_generic_expr (vect_dump, step_expr, TDF_SLIM);
3981 fprintf (vect_dump, ", init: ");
3982 print_generic_expr (vect_dump, init_expr, TDF_SLIM);
3985 *init = init_expr;
3986 *step = step_expr;
3988 if (TREE_CODE (step_expr) != INTEGER_CST)
3990 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3991 fprintf (vect_dump, "step unknown.");
3992 return false;
3995 if (strict)
3996 if (!integer_onep (step_expr))
3998 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3999 print_generic_expr (vect_dump, step_expr, TDF_SLIM);
4000 return false;
4003 return true;
4007 /* Function vect_analyze_scalar_cycles.
4009 Examine the cross iteration def-use cycles of scalar variables, by
4010 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
4011 cycles that they represent do not impede vectorization.
4013 FORNOW: Reduction as in the following loop, is not supported yet:
4014 loop1:
4015 for (i=0; i<N; i++)
4016 sum += a[i];
4017 The cross-iteration cycle corresponding to variable 'sum' will be
4018 considered too complicated and will impede vectorization.
4020 FORNOW: Induction as in the following loop, is not supported yet:
4021 loop2:
4022 for (i=0; i<N; i++)
4023 a[i] = i;
4025 However, the following loop *is* vectorizable:
4026 loop3:
4027 for (i=0; i<N; i++)
4028 a[i] = b[i];
4030 In both loops there exists a def-use cycle for the variable i:
4031 loop: i_2 = PHI (i_0, i_1)
4032 a[i_2] = ...;
4033 i_1 = i_2 + 1;
4034 GOTO loop;
4036 The evolution of the above cycle is considered simple enough,
4037 however, we also check that the cycle does not need to be
4038 vectorized, i.e - we check that the variable that this cycle
4039 defines is only used for array indexing or in stmts that do not
4040 need to be vectorized. This is not the case in loop2, but it
4041 *is* the case in loop3. */
4043 static bool
4044 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
4046 tree phi;
4047 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4048 basic_block bb = loop->header;
4049 tree dummy;
4051 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4052 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
4054 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
4056 tree access_fn = NULL;
4058 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4060 fprintf (vect_dump, "Analyze phi: ");
4061 print_generic_expr (vect_dump, phi, TDF_SLIM);
4064 /* Skip virtual phi's. The data dependences that are associated with
4065 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
4067 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
4069 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4070 fprintf (vect_dump, "virtual phi. skip.");
4071 continue;
4074 /* Analyze the evolution function. */
4076 /* FORNOW: The only scalar cross-iteration cycles that we allow are
4077 those of loop induction variables; This property is verified here.
4079 Furthermore, if that induction variable is used in an operation
4080 that needs to be vectorized (i.e, is not solely used to index
4081 arrays and check the exit condition) - we do not support its
4082 vectorization yet. This property is verified in vect_is_simple_use,
4083 during vect_analyze_operations. */
4085 access_fn = /* instantiate_parameters
4086 (loop,*/
4087 analyze_scalar_evolution (loop, PHI_RESULT (phi));
4089 if (!access_fn)
4091 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4092 LOOP_LOC (loop_vinfo)))
4093 fprintf (vect_dump, "not vectorized: unsupported scalar cycle.");
4094 return false;
4097 if (vect_print_dump_info (REPORT_DETAILS,
4098 LOOP_LOC (loop_vinfo)))
4100 fprintf (vect_dump, "Access function of PHI: ");
4101 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
4104 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
4105 &dummy, false))
4107 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4108 LOOP_LOC (loop_vinfo)))
4109 fprintf (vect_dump, "not vectorized: unsupported scalar cycle.");
4110 return false;
4114 return true;
4118 /* Function vect_analyze_data_ref_dependence.
4120 Return TRUE if there (might) exist a dependence between a memory-reference
4121 DRA and a memory-reference DRB. */
4123 static bool
4124 vect_analyze_data_ref_dependence (struct data_reference *dra,
4125 struct data_reference *drb,
4126 loop_vec_info loop_vinfo)
4128 bool differ_p;
4129 struct data_dependence_relation *ddr;
4131 if (!array_base_name_differ_p (dra, drb, &differ_p))
4133 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4134 LOOP_LOC (loop_vinfo)))
4136 fprintf (vect_dump,
4137 "not vectorized: can't determine dependence between: ");
4138 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
4139 fprintf (vect_dump, " and ");
4140 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
4142 return true;
4145 if (differ_p)
4146 return false;
4148 ddr = initialize_data_dependence_relation (dra, drb);
4149 compute_affine_dependence (ddr);
4151 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4152 return false;
4154 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4155 LOOP_LOC (loop_vinfo)))
4157 fprintf (vect_dump,
4158 "not vectorized: possible dependence between data-refs ");
4159 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
4160 fprintf (vect_dump, " and ");
4161 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
4164 return true;
4168 /* Function vect_analyze_data_ref_dependences.
4170 Examine all the data references in the loop, and make sure there do not
4171 exist any data dependences between them.
4173 TODO: dependences which distance is greater than the vectorization factor
4174 can be ignored. */
4176 static bool
4177 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
4179 unsigned int i, j;
4180 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4181 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4183 /* Examine store-store (output) dependences. */
4185 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4186 fprintf (vect_dump, "=== vect_analyze_dependences ===");
4188 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4189 fprintf (vect_dump, "compare all store-store pairs.");
4191 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
4193 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
4195 struct data_reference *dra =
4196 VARRAY_GENERIC_PTR (loop_write_refs, i);
4197 struct data_reference *drb =
4198 VARRAY_GENERIC_PTR (loop_write_refs, j);
4199 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
4200 return false;
4204 /* Examine load-store (true/anti) dependences. */
4206 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4207 fprintf (vect_dump, "compare all load-store pairs.");
4209 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
4211 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
4213 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
4214 struct data_reference *drb =
4215 VARRAY_GENERIC_PTR (loop_write_refs, j);
4216 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
4217 return false;
4221 return true;
4225 /* Function vect_compute_data_ref_alignment
4227 Compute the misalignment of the data reference DR.
4229 Output:
4230 1. If during the misalignment computation it is found that the data reference
4231 cannot be vectorized then false is returned.
4232 2. DR_MISALIGNMENT (DR) is defined.
4234 FOR NOW: No analysis is actually performed. Misalignment is calculated
4235 only for trivial cases. TODO. */
4237 static bool
4238 vect_compute_data_ref_alignment (struct data_reference *dr)
4240 tree stmt = DR_STMT (dr);
4241 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4242 tree ref = DR_REF (dr);
4243 tree vectype;
4244 tree base, alignment;
4245 bool base_aligned_p;
4246 tree misalign;
4248 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4249 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
4251 /* Initialize misalignment to unknown. */
4252 DR_MISALIGNMENT (dr) = -1;
4254 misalign = STMT_VINFO_VECT_MISALIGNMENT (stmt_info);
4255 base_aligned_p = STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info);
4256 base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4257 vectype = STMT_VINFO_VECTYPE (stmt_info);
4259 if (!misalign)
4261 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4263 fprintf (vect_dump, "Unknown alignment for access: ");
4264 print_generic_expr (vect_dump, base, TDF_SLIM);
4266 return true;
4269 if (!base_aligned_p)
4271 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4273 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4275 fprintf (vect_dump, "can't force alignment of ref: ");
4276 print_generic_expr (vect_dump, ref, TDF_SLIM);
4278 return true;
4281 /* Force the alignment of the decl.
4282 NOTE: This is the only change to the code we make during
4283 the analysis phase, before deciding to vectorize the loop. */
4284 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4285 fprintf (vect_dump, "force alignment");
4286 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4287 DECL_USER_ALIGN (base) = 1;
4290 /* At this point we assume that the base is aligned. */
4291 gcc_assert (base_aligned_p
4292 || (TREE_CODE (base) == VAR_DECL
4293 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4295 /* Alignment required, in bytes: */
4296 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
4298 /* Modulo alignment. */
4299 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
4300 if (tree_int_cst_sgn (misalign) < 0)
4302 /* Negative misalignment value. */
4303 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4304 fprintf (vect_dump, "unexpected misalign value");
4305 return false;
4308 DR_MISALIGNMENT (dr) = tree_low_cst (misalign, 1);
4310 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4311 fprintf (vect_dump, "misalign = %d bytes", DR_MISALIGNMENT (dr));
4313 return true;
4317 /* Function vect_compute_data_refs_alignment
4319 Compute the misalignment of data references in the loop.
4320 This pass may take place at function granularity instead of at loop
4321 granularity.
4323 FOR NOW: No analysis is actually performed. Misalignment is calculated
4324 only for trivial cases. TODO. */
4326 static bool
4327 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4329 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4330 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4331 unsigned int i;
4333 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4335 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4336 if (!vect_compute_data_ref_alignment (dr))
4337 return false;
4340 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4342 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4343 if (!vect_compute_data_ref_alignment (dr))
4344 return false;
4347 return true;
4351 /* Function vect_enhance_data_refs_alignment
4353 This pass will use loop versioning and loop peeling in order to enhance
4354 the alignment of data references in the loop.
4356 FOR NOW: we assume that whatever versioning/peeling takes place, only the
4357 original loop is to be vectorized; Any other loops that are created by
4358 the transformations performed in this pass - are not supposed to be
4359 vectorized. This restriction will be relaxed. */
4361 static void
4362 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4364 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4365 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4366 unsigned int i;
4369 This pass will require a cost model to guide it whether to apply peeling
4370 or versioning or a combination of the two. For example, the scheme that
4371 intel uses when given a loop with several memory accesses, is as follows:
4372 choose one memory access ('p') which alignment you want to force by doing
4373 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4374 other accesses are not necessarily aligned, or (2) use loop versioning to
4375 generate one loop in which all accesses are aligned, and another loop in
4376 which only 'p' is necessarily aligned.
4378 ("Automatic Intra-Register Vectorization for the Intel Architecture",
4379 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4380 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4382 Devising a cost model is the most critical aspect of this work. It will
4383 guide us on which access to peel for, whether to use loop versioning, how
4384 many versions to create, etc. The cost model will probably consist of
4385 generic considerations as well as target specific considerations (on
4386 powerpc for example, misaligned stores are more painful than misaligned
4387 loads).
4389 Here is the general steps involved in alignment enhancements:
4391 -- original loop, before alignment analysis:
4392 for (i=0; i<N; i++){
4393 x = q[i]; # DR_MISALIGNMENT(q) = unknown
4394 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4397 -- After vect_compute_data_refs_alignment:
4398 for (i=0; i<N; i++){
4399 x = q[i]; # DR_MISALIGNMENT(q) = 3
4400 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4403 -- Possibility 1: we do loop versioning:
4404 if (p is aligned) {
4405 for (i=0; i<N; i++){ # loop 1A
4406 x = q[i]; # DR_MISALIGNMENT(q) = 3
4407 p[i] = y; # DR_MISALIGNMENT(p) = 0
4410 else {
4411 for (i=0; i<N; i++){ # loop 1B
4412 x = q[i]; # DR_MISALIGNMENT(q) = 3
4413 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4417 -- Possibility 2: we do loop peeling:
4418 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4419 x = q[i];
4420 p[i] = y;
4422 for (i = 3; i < N; i++){ # loop 2A
4423 x = q[i]; # DR_MISALIGNMENT(q) = 0
4424 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4427 -- Possibility 3: combination of loop peeling and versioning:
4428 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4429 x = q[i];
4430 p[i] = y;
4432 if (p is aligned) {
4433 for (i = 3; i<N; i++){ # loop 3A
4434 x = q[i]; # DR_MISALIGNMENT(q) = 0
4435 p[i] = y; # DR_MISALIGNMENT(p) = 0
4438 else {
4439 for (i = 3; i<N; i++){ # loop 3B
4440 x = q[i]; # DR_MISALIGNMENT(q) = 0
4441 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4445 These loops are later passed to loop_transform to be vectorized. The
4446 vectorizer will use the alignment information to guide the transformation
4447 (whether to generate regular loads/stores, or with special handling for
4448 misalignment).
4451 /* (1) Peeling to force alignment. */
4453 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4454 Considerations:
4455 + How many accesses will become aligned due to the peeling
4456 - How many accesses will become unaligned due to the peeling,
4457 and the cost of misaligned accesses.
4458 - The cost of peeling (the extra runtime checks, the increase
4459 in code size).
4461 The scheme we use FORNOW: peel to force the alignment of the first
4462 misaligned store in the loop.
4463 Rationale: misaligned stores are not yet supported.
4465 TODO: Use a better cost model. */
4467 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4469 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4470 if (!aligned_access_p (dr))
4472 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4473 LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4474 break;
4478 if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4480 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
4481 fprintf (vect_dump, "Peeling for alignment will not be applied.");
4482 return;
4484 else
4485 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
4486 fprintf (vect_dump, "Peeling for alignment will be applied.");
4489 /* (1.2) Update the alignment info according to the peeling factor.
4490 If the misalignment of the DR we peel for is M, then the
4491 peeling factor is VF - M, and the misalignment of each access DR_i
4492 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4493 If the misalignment of the DR we peel for is unknown, then the
4494 misalignment of each access DR_i in the loop is also unknown.
4496 FORNOW: set the misalignment of the accesses to unknown even
4497 if the peeling factor is known at compile time.
4499 TODO: - if the peeling factor is known at compile time, use that
4500 when updating the misalignment info of the loop DRs.
4501 - consider accesses that are known to have the same
4502 alignment, even if that alignment is unknown. */
4504 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4506 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4507 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4509 DR_MISALIGNMENT (dr) = 0;
4510 if (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo)))
4511 fprintf (vect_dump, "Alignment of access forced using peeling.");
4513 else
4514 DR_MISALIGNMENT (dr) = -1;
4516 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4518 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4519 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4521 DR_MISALIGNMENT (dr) = 0;
4522 if (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo)))
4523 fprintf (vect_dump, "Alignment of access forced using peeling.");
4525 else
4526 DR_MISALIGNMENT (dr) = -1;
4531 /* Function vect_analyze_data_refs_alignment
4533 Analyze the alignment of the data-references in the loop.
4534 FOR NOW: Until support for misliagned accesses is in place, only if all
4535 accesses are aligned can the loop be vectorized. This restriction will be
4536 relaxed. */
4538 static bool
4539 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4541 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4542 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4543 enum dr_alignment_support supportable_dr_alignment;
4544 unsigned int i;
4546 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4547 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
4550 /* This pass may take place at function granularity instead of at loop
4551 granularity. */
4553 if (!vect_compute_data_refs_alignment (loop_vinfo))
4555 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4556 LOOP_LOC (loop_vinfo)))
4557 fprintf (vect_dump,
4558 "not vectorized: can't calculate alignment for data ref.");
4559 return false;
4563 /* This pass will decide on using loop versioning and/or loop peeling in
4564 order to enhance the alignment of data references in the loop. */
4566 vect_enhance_data_refs_alignment (loop_vinfo);
4569 /* Finally, check that all the data references in the loop can be
4570 handled with respect to their alignment. */
4572 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4574 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4575 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4576 if (!supportable_dr_alignment)
4578 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4579 LOOP_LOC (loop_vinfo)))
4580 fprintf (vect_dump, "not vectorized: unsupported unaligned load.");
4581 return false;
4583 if (supportable_dr_alignment != dr_aligned
4584 && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))))
4585 fprintf (vect_dump, "Vectorizing an unaligned access.");
4587 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4589 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4590 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4591 if (!supportable_dr_alignment)
4593 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4594 LOOP_LOC (loop_vinfo)))
4595 fprintf (vect_dump, "not vectorized: unsupported unaligned store.");
4596 return false;
4598 if (supportable_dr_alignment != dr_aligned
4599 && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))))
4600 fprintf (vect_dump, "Vectorizing an unaligned access.");
4603 return true;
4607 /* Function vect_analyze_data_ref_access.
4609 Analyze the access pattern of the data-reference DR. For now, a data access
4610 has to consecutive to be considered vectorizable. */
4612 static bool
4613 vect_analyze_data_ref_access (struct data_reference *dr)
4615 tree stmt = DR_STMT (dr);
4616 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4617 tree step = STMT_VINFO_VECT_STEP (stmt_info);
4618 tree scalar_type = TREE_TYPE (DR_REF (dr));
4620 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
4622 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4623 fprintf (vect_dump, "not consecutive access");
4624 return false;
4626 return true;
4630 /* Function vect_analyze_data_ref_accesses.
4632 Analyze the access pattern of all the data references in the loop.
4634 FORNOW: the only access pattern that is considered vectorizable is a
4635 simple step 1 (consecutive) access.
4637 FORNOW: handle only arrays and pointer accesses. */
4639 static bool
4640 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4642 unsigned int i;
4643 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4644 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4646 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4647 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
4649 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4651 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4652 bool ok = vect_analyze_data_ref_access (dr);
4653 if (!ok)
4655 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4656 LOOP_LOC (loop_vinfo)))
4657 fprintf (vect_dump, "not vectorized: complicated access pattern.");
4658 return false;
4662 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4664 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4665 bool ok = vect_analyze_data_ref_access (dr);
4666 if (!ok)
4668 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4669 LOOP_LOC (loop_vinfo)))
4670 fprintf (vect_dump, "not vectorized: complicated access pattern.");
4671 return false;
4675 return true;
4679 /* Function vect_analyze_pointer_ref_access.
4681 Input:
4682 STMT - a stmt that contains a data-ref
4683 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4685 If the data-ref access is vectorizable, return a data_reference structure
4686 that represents it (DR). Otherwise - return NULL. */
4688 static struct data_reference *
4689 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4691 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4692 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4693 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4694 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4695 tree init, step;
4696 tree reftype, innertype;
4697 tree indx_access_fn;
4698 int loopnum = loop->num;
4699 struct data_reference *dr;
4701 if (!access_fn)
4703 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4704 LOOP_LOC (loop_vinfo)))
4705 fprintf (vect_dump, "not vectorized: complicated pointer access.");
4706 return NULL;
4709 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4711 fprintf (vect_dump, "Access function of ptr: ");
4712 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
4715 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4717 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4718 LOOP_LOC (loop_vinfo)))
4719 fprintf (vect_dump, "not vectorized: pointer access is not simple.");
4720 return NULL;
4723 STRIP_NOPS (init);
4725 if (!expr_invariant_in_loop_p (loop, init))
4727 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4728 LOOP_LOC (loop_vinfo)))
4729 fprintf (vect_dump,
4730 "not vectorized: initial condition is not loop invariant.");
4731 return NULL;
4734 if (TREE_CODE (step) != INTEGER_CST)
4736 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4737 LOOP_LOC (loop_vinfo)))
4738 fprintf (vect_dump,
4739 "not vectorized: non constant step for pointer access.");
4740 return NULL;
4743 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4744 if (TREE_CODE (reftype) != POINTER_TYPE)
4746 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4747 LOOP_LOC (loop_vinfo)))
4748 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
4749 return NULL;
4752 reftype = TREE_TYPE (init);
4753 if (TREE_CODE (reftype) != POINTER_TYPE)
4755 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4756 LOOP_LOC (loop_vinfo)))
4757 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
4758 return NULL;
4761 innertype = TREE_TYPE (reftype);
4762 if (tree_int_cst_compare (TYPE_SIZE_UNIT (innertype), step))
4764 /* FORNOW: support only consecutive access */
4765 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4766 LOOP_LOC (loop_vinfo)))
4767 fprintf (vect_dump, "not vectorized: non consecutive access.");
4768 return NULL;
4771 STMT_VINFO_VECT_STEP (stmt_info) = fold_convert (ssizetype, step);
4772 if (TREE_CODE (init) == PLUS_EXPR
4773 || TREE_CODE (init) == MINUS_EXPR)
4774 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) =
4775 size_binop (TREE_CODE (init), ssize_int (0),
4776 fold_convert (ssizetype, TREE_OPERAND (init, 1)));
4777 else
4778 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = ssize_int (0);
4780 indx_access_fn =
4781 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4782 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4784 fprintf (vect_dump, "Access function of ptr indx: ");
4785 print_generic_expr (vect_dump, indx_access_fn, TDF_SLIM);
4787 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4788 return dr;
4792 /* Function vect_get_memtag_and_dr.
4794 The function returns the relevant variable for memory tag (for aliasing
4795 purposes). Also data reference structure DR is created.
4797 This function handles three kinds of MEMREF:
4799 It is called from vect_analyze_data_refs with a MEMREF that is either an
4800 ARRAY_REF or an INDIRECT_REF (this is category 1 - "recursion begins").
4801 It builds a DR for them using vect_get_base_and_offset, and calls itself
4802 recursively to retrieve the relevant memtag for the MEMREF, "peeling" the
4803 MEMREF along the way. During the recursive calls, the function may be called
4804 with a MEMREF for which the recursion has to continue - PLUS_EXPR,
4805 MINUS_EXPR, INDIRECT_REF (category 2 - "recursion continues"),
4806 and/or with a MEMREF for which a memtag can be trivially obtained - VAR_DECL
4807 and SSA_NAME (this is category 3 - "recursion stop condition").
4809 When the MEMREF falls into category 1 there is still no data reference struct
4810 (DR) available. It is created by this function, and then, along the
4811 recursion, MEMREF will fall into category 2 or 3, in which case a DR will
4812 have already been created, but the analysis continues to retrieve the MEMTAG.
4814 Input:
4815 MEMREF - data reference in STMT
4816 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4818 Output:
4819 DR - data_reference struct for MEMREF
4820 return value - the relevant variable for memory tag (for aliasing purposes).
4824 static tree
4825 vect_get_memtag_and_dr (tree memref, tree stmt, bool is_read,
4826 loop_vec_info loop_vinfo,
4827 tree vectype, struct data_reference **dr)
4829 tree symbl, oprnd0, oprnd1;
4830 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4831 tree offset, misalign, step;
4832 tree ref_to_be_analyzed, tag, dr_base;
4833 struct data_reference *new_dr;
4834 bool base_aligned_p;
4836 if (*dr)
4838 /* Category 3: recursion stop condition. */
4839 /* (1) A DR already exists. We only need to get the relevant memtag for
4840 MEMREF, the rest of the data was already initialized. */
4842 switch (TREE_CODE (memref))
4844 /* (1.1) Stop condition: find the relevant memtag and return. */
4845 case SSA_NAME:
4846 symbl = SSA_NAME_VAR (memref);
4847 tag = get_var_ann (symbl)->type_mem_tag;
4848 if (!tag)
4850 tree ptr = TREE_OPERAND (DR_REF ((*dr)), 0);
4851 if (TREE_CODE (ptr) == SSA_NAME)
4852 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
4854 if (!tag)
4856 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4857 UNKNOWN_LOC))
4858 fprintf (vect_dump, "not vectorized: no memtag for ref.");
4859 return NULL_TREE;
4861 return tag;
4863 case VAR_DECL:
4864 case PARM_DECL:
4865 return memref;
4867 /* Category 2: recursion continues. */
4868 /* (1.2) A recursive call to find the relevant memtag is required. */
4869 case INDIRECT_REF:
4870 symbl = TREE_OPERAND (memref, 0);
4871 break; /* For recursive call. */
4873 case COMPONENT_REF:
4874 /* Could have recorded more accurate information -
4875 i.e, the actual FIELD_DECL that is being referenced -
4876 but later passes expect VAR_DECL as the nmt. */
4877 /* Fall through. */
4879 case ADDR_EXPR:
4880 symbl = STMT_VINFO_VECT_DR_BASE (stmt_info);
4881 break; /* For recursive call. */
4883 case PLUS_EXPR:
4884 case MINUS_EXPR:
4885 /* Although DR exists, we have to call the function recursively to
4886 build MEMTAG for such expression. This is handled below. */
4887 oprnd0 = TREE_OPERAND (memref, 0);
4888 oprnd1 = TREE_OPERAND (memref, 1);
4890 STRIP_NOPS (oprnd1);
4891 /* Supported plus/minus expressions are of the form
4892 {address_base + offset}, such that address_base is of type
4893 POINTER/ARRAY, and offset is either an INTEGER_CST of type POINTER,
4894 or it's not of type POINTER/ARRAY.
4895 TODO: swap operands if {offset + address_base}. */
4896 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4897 && TREE_CODE (oprnd1) != INTEGER_CST)
4898 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4899 return NULL_TREE;
4901 symbl = oprnd0;
4902 break; /* For recursive call. */
4904 default:
4905 return NULL_TREE;
4908 else
4910 /* Category 1: recursion begins. */
4911 /* (2) A DR does not exist yet and must be built, followed by a
4912 recursive call to get the relevant memtag for MEMREF. */
4914 switch (TREE_CODE (memref))
4916 case INDIRECT_REF:
4917 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4918 if (!new_dr)
4919 return NULL_TREE;
4920 *dr = new_dr;
4921 symbl = DR_BASE_NAME (new_dr);
4922 ref_to_be_analyzed = DR_BASE_NAME (new_dr);
4923 break;
4925 case ARRAY_REF:
4926 new_dr = analyze_array (stmt, memref, is_read);
4927 *dr = new_dr;
4928 symbl = DR_BASE_NAME (new_dr);
4929 ref_to_be_analyzed = memref;
4930 break;
4932 default:
4933 /* TODO: Support data-refs of form a[i].p for unions and single
4934 field structures. */
4935 return NULL_TREE;
4938 offset = ssize_int (0);
4939 misalign = ssize_int (0);
4940 step = ssize_int (0);
4942 /* Analyze data-ref, find its base, initial offset from the base, step,
4943 and alignment. */
4944 dr_base = vect_get_base_and_offset (new_dr, ref_to_be_analyzed,
4945 vectype, loop_vinfo, &offset,
4946 &misalign, &step, &base_aligned_p);
4947 if (!dr_base)
4948 return NULL_TREE;
4950 /* Initialize information according to above analysis. */
4951 /* Since offset and step of a pointer can be also set in
4952 vect_analyze_pointer_ref_access, we combine the values here. */
4953 if (STMT_VINFO_VECT_INIT_OFFSET (stmt_info))
4954 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) =
4955 size_binop (PLUS_EXPR, offset,
4956 STMT_VINFO_VECT_INIT_OFFSET (stmt_info));
4957 else
4958 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
4960 if (step && STMT_VINFO_VECT_STEP (stmt_info))
4961 STMT_VINFO_VECT_STEP (stmt_info) =
4962 size_binop (PLUS_EXPR, step, STMT_VINFO_VECT_STEP (stmt_info));
4963 else
4964 STMT_VINFO_VECT_STEP (stmt_info) = step;
4966 STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info) = base_aligned_p;
4967 STMT_VINFO_VECT_MISALIGNMENT (stmt_info) = misalign;
4968 STMT_VINFO_VECT_DR_BASE (stmt_info) = dr_base;
4971 if (!symbl)
4972 return NULL_TREE;
4973 /* Recursive call to retrieve the relevant memtag. */
4974 tag = vect_get_memtag_and_dr (symbl, stmt, is_read, loop_vinfo, vectype, dr);
4975 return tag;
4979 /* Function vect_analyze_data_refs.
4981 Find all the data references in the loop.
4983 The general structure of the analysis of data refs in the vectorizer is as
4984 follows:
4985 1- vect_analyze_data_refs(loop):
4986 Find and analyze all data-refs in the loop:
4987 foreach ref
4988 ref_stmt.memtag = vect_get_memtag_and_dr (ref)
4989 1.1- vect_get_memtag_and_dr(ref):
4990 Analyze ref, and build a DR (data_referece struct) for it;
4991 call vect_get_base_and_offset to compute base, initial_offset,
4992 step and alignment. Set ref_stmt.base, ref_stmt.initial_offset,
4993 ref_stmt.alignment, and ref_stmt.step accordingly.
4994 1.1.1- vect_get_base_and_offset():
4995 Calculate base, initial_offset, step and alignment.
4996 For ARRAY_REFs and COMPONENT_REFs use call get_inner_reference.
4997 2- vect_analyze_dependences(): apply dependence testing using ref_stmt.DR
4998 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
4999 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
5001 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
5002 which base is really an array (not a pointer) and which alignment
5003 can be forced. This restriction will be relaxed. */
5005 static bool
5006 vect_analyze_data_refs (loop_vec_info loop_vinfo)
5008 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5009 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5010 int nbbs = loop->num_nodes;
5011 block_stmt_iterator si;
5012 int j;
5013 struct data_reference *dr;
5015 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5016 fprintf (vect_dump, "=== vect_analyze_data_refs ===");
5018 for (j = 0; j < nbbs; j++)
5020 basic_block bb = bbs[j];
5021 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5023 bool is_read = false;
5024 tree stmt = bsi_stmt (si);
5025 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5026 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5027 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5028 vuse_optype vuses = STMT_VUSE_OPS (stmt);
5029 varray_type *datarefs = NULL;
5030 int nvuses, nv_may_defs, nv_must_defs;
5031 tree memref = NULL;
5032 tree symbl;
5033 tree scalar_type, vectype;
5035 /* Assumption: there exists a data-ref in stmt, if and only if
5036 it has vuses/vdefs. */
5038 if (!vuses && !v_may_defs && !v_must_defs)
5039 continue;
5041 nvuses = NUM_VUSES (vuses);
5042 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
5043 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
5045 if (nvuses && (nv_may_defs || nv_must_defs))
5047 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5049 fprintf (vect_dump, "unexpected vdefs and vuses in stmt: ");
5050 print_generic_expr (vect_dump, stmt, TDF_SLIM);
5052 return false;
5055 if (TREE_CODE (stmt) != MODIFY_EXPR)
5057 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5059 fprintf (vect_dump, "unexpected vops in stmt: ");
5060 print_generic_expr (vect_dump, stmt, TDF_SLIM);
5062 return false;
5065 if (vuses)
5067 memref = TREE_OPERAND (stmt, 1);
5068 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
5069 is_read = true;
5071 else /* vdefs */
5073 memref = TREE_OPERAND (stmt, 0);
5074 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
5075 is_read = false;
5078 scalar_type = TREE_TYPE (memref);
5079 vectype = get_vectype_for_scalar_type (scalar_type);
5080 if (!vectype)
5082 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5084 fprintf (vect_dump, "no vectype for stmt: ");
5085 print_generic_expr (vect_dump, stmt, TDF_SLIM);
5086 fprintf (vect_dump, " scalar_type: ");
5087 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
5089 /* It is not possible to vectorize this data reference. */
5090 return false;
5092 /* Analyze MEMREF. If it is of a supported form, build data_reference
5093 struct for it (DR) and find memtag for aliasing purposes. */
5094 dr = NULL;
5095 symbl = vect_get_memtag_and_dr (memref, stmt, is_read, loop_vinfo,
5096 vectype, &dr);
5097 if (!symbl)
5099 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
5100 LOOP_LOC (loop_vinfo)))
5102 fprintf (vect_dump, "not vectorized: unhandled data ref: ");
5103 print_generic_expr (vect_dump, stmt, TDF_SLIM);
5105 return false;
5107 STMT_VINFO_MEMTAG (stmt_info) = symbl;
5108 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5109 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
5110 STMT_VINFO_DATA_REF (stmt_info) = dr;
5114 return true;
5118 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
5120 /* Function vect_mark_relevant.
5122 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
5124 static void
5125 vect_mark_relevant (varray_type *worklist, tree stmt)
5127 stmt_vec_info stmt_info;
5129 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5130 fprintf (vect_dump, "mark relevant.");
5132 if (TREE_CODE (stmt) == PHI_NODE)
5134 VARRAY_PUSH_TREE (*worklist, stmt);
5135 return;
5138 stmt_info = vinfo_for_stmt (stmt);
5140 if (!stmt_info)
5142 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5144 fprintf (vect_dump, "mark relevant: no stmt info!!.");
5145 print_generic_expr (vect_dump, stmt, TDF_SLIM);
5147 return;
5150 if (STMT_VINFO_RELEVANT_P (stmt_info))
5152 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5153 fprintf (vect_dump, "already marked relevant.");
5154 return;
5157 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5158 VARRAY_PUSH_TREE (*worklist, stmt);
5162 /* Function vect_stmt_relevant_p.
5164 Return true if STMT in loop that is represented by LOOP_VINFO is
5165 "relevant for vectorization".
5167 A stmt is considered "relevant for vectorization" if:
5168 - it has uses outside the loop.
5169 - it has vdefs (it alters memory).
5170 - control stmts in the loop (except for the exit condition).
5172 CHECKME: what other side effects would the vectorizer allow? */
5174 static bool
5175 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5177 v_may_def_optype v_may_defs;
5178 v_must_def_optype v_must_defs;
5179 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5180 int i;
5181 dataflow_t df;
5182 int num_uses;
5184 /* cond stmt other than loop exit cond. */
5185 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5186 return true;
5188 /* changing memory. */
5189 if (TREE_CODE (stmt) != PHI_NODE)
5191 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5192 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5193 if (v_may_defs || v_must_defs)
5195 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5196 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
5197 return true;
5201 /* uses outside the loop. */
5202 df = get_immediate_uses (stmt);
5203 num_uses = num_immediate_uses (df);
5204 for (i = 0; i < num_uses; i++)
5206 tree use = immediate_use (df, i);
5207 basic_block bb = bb_for_stmt (use);
5208 if (!flow_bb_inside_loop_p (loop, bb))
5210 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5211 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
5212 return true;
5216 return false;
5220 /* Function vect_mark_stmts_to_be_vectorized.
5222 Not all stmts in the loop need to be vectorized. For example:
5224 for i...
5225 for j...
5226 1. T0 = i + j
5227 2. T1 = a[T0]
5229 3. j = j + 1
5231 Stmt 1 and 3 do not need to be vectorized, because loop control and
5232 addressing of vectorized data-refs are handled differently.
5234 This pass detects such stmts. */
5236 static bool
5237 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5239 varray_type worklist;
5240 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5241 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5242 unsigned int nbbs = loop->num_nodes;
5243 block_stmt_iterator si;
5244 tree stmt;
5245 stmt_ann_t ann;
5246 unsigned int i;
5247 int j;
5248 use_optype use_ops;
5249 stmt_vec_info stmt_info;
5250 basic_block bb;
5251 tree phi;
5253 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5254 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
5256 bb = loop->header;
5257 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5259 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5261 fprintf (vect_dump, "init: phi relevant? ");
5262 print_generic_expr (vect_dump, phi, TDF_SLIM);
5265 if (vect_stmt_relevant_p (phi, loop_vinfo))
5267 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
5268 LOOP_LOC (loop_vinfo)))
5269 fprintf (vect_dump, "unsupported reduction/induction.");
5270 return false;
5274 VARRAY_TREE_INIT (worklist, 64, "work list");
5276 /* 1. Init worklist. */
5278 for (i = 0; i < nbbs; i++)
5280 bb = bbs[i];
5281 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5283 stmt = bsi_stmt (si);
5285 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5287 fprintf (vect_dump, "init: stmt relevant? ");
5288 print_generic_expr (vect_dump, stmt, TDF_SLIM);
5291 stmt_info = vinfo_for_stmt (stmt);
5292 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5294 if (vect_stmt_relevant_p (stmt, loop_vinfo))
5295 vect_mark_relevant (&worklist, stmt);
5300 /* 2. Process_worklist */
5302 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5304 stmt = VARRAY_TOP_TREE (worklist);
5305 VARRAY_POP (worklist);
5307 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5309 fprintf (vect_dump, "worklist: examine stmt: ");
5310 print_generic_expr (vect_dump, stmt, TDF_SLIM);
5313 /* Examine the USES in this statement. Mark all the statements which
5314 feed this statement's uses as "relevant", unless the USE is used as
5315 an array index. */
5317 if (TREE_CODE (stmt) == PHI_NODE)
5319 /* follow the def-use chain inside the loop. */
5320 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5322 tree arg = PHI_ARG_DEF (stmt, j);
5323 tree def_stmt = NULL_TREE;
5324 basic_block bb;
5325 if (!vect_is_simple_use (arg, loop_vinfo, &def_stmt))
5327 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
5328 LOOP_LOC (loop_vinfo)))
5329 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
5330 varray_clear (worklist);
5331 return false;
5333 if (!def_stmt)
5334 continue;
5336 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5338 fprintf (vect_dump, "worklist: def_stmt: ");
5339 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
5342 bb = bb_for_stmt (def_stmt);
5343 if (flow_bb_inside_loop_p (loop, bb))
5344 vect_mark_relevant (&worklist, def_stmt);
5348 ann = stmt_ann (stmt);
5349 use_ops = USE_OPS (ann);
5351 for (i = 0; i < NUM_USES (use_ops); i++)
5353 tree use = USE_OP (use_ops, i);
5355 /* We are only interested in uses that need to be vectorized. Uses
5356 that are used for address computation are not considered relevant.
5358 if (exist_non_indexing_operands_for_use_p (use, stmt))
5360 tree def_stmt = NULL_TREE;
5361 basic_block bb;
5362 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt))
5364 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
5365 LOOP_LOC (loop_vinfo)))
5366 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
5367 varray_clear (worklist);
5368 return false;
5371 if (!def_stmt)
5372 continue;
5374 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5376 fprintf (vect_dump, "worklist: examine use %d: ", i);
5377 print_generic_expr (vect_dump, use, TDF_SLIM);
5380 bb = bb_for_stmt (def_stmt);
5381 if (flow_bb_inside_loop_p (loop, bb))
5382 vect_mark_relevant (&worklist, def_stmt);
5385 } /* while worklist */
5387 varray_clear (worklist);
5388 return true;
5392 /* Function vect_can_advance_ivs_p
5394 In case the number of iterations that LOOP iterates in unknown at compile
5395 time, an epilog loop will be generated, and the loop induction variables
5396 (IVs) will be "advanced" to the value they are supposed to take just before
5397 the epilog loop. Here we check that the access function of the loop IVs
5398 and the expression that represents the loop bound are simple enough.
5399 These restrictions will be relaxed in the future. */
5401 static bool
5402 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
5404 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5405 basic_block bb = loop->header;
5406 tree phi;
5408 /* Analyze phi functions of the loop header. */
5410 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5412 tree access_fn = NULL;
5413 tree evolution_part;
5415 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5417 fprintf (vect_dump, "Analyze phi: ");
5418 print_generic_expr (vect_dump, phi, TDF_SLIM);
5421 /* Skip virtual phi's. The data dependences that are associated with
5422 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
5424 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5426 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5427 fprintf (vect_dump, "virtual phi. skip.");
5428 continue;
5431 /* Analyze the evolution function. */
5433 access_fn = instantiate_parameters
5434 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5436 if (!access_fn)
5438 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5439 fprintf (vect_dump, "No Access function.");
5440 return false;
5443 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5445 fprintf (vect_dump, "Access function of PHI: ");
5446 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
5449 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5451 if (evolution_part == NULL_TREE)
5452 return false;
5454 /* FORNOW: We do not transform initial conditions of IVs
5455 which evolution functions are a polynomial of degree >= 2. */
5457 if (tree_is_chrec (evolution_part))
5458 return false;
5461 return true;
5465 /* Function vect_get_loop_niters.
5467 Determine how many iterations the loop is executed.
5468 If an expression that represents the number of iterations
5469 can be constructed, place it in NUMBER_OF_ITERATIONS.
5470 Return the loop exit condition. */
5472 static tree
5473 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5475 tree niters;
5477 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5478 fprintf (vect_dump, "=== get_loop_niters ===");
5480 niters = number_of_iterations_in_loop (loop);
5482 if (niters != NULL_TREE
5483 && niters != chrec_dont_know)
5485 *number_of_iterations = niters;
5487 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5489 fprintf (vect_dump, "==> get_loop_niters:" );
5490 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
5494 return get_loop_exit_condition (loop);
5498 /* Function vect_analyze_loop_form.
5500 Verify the following restrictions (some may be relaxed in the future):
5501 - it's an inner-most loop
5502 - number of BBs = 2 (which are the loop header and the latch)
5503 - the loop has a pre-header
5504 - the loop has a single entry and exit
5505 - the loop exit condition is simple enough, and the number of iterations
5506 can be analyzed (a countable loop). */
5508 static loop_vec_info
5509 vect_analyze_loop_form (struct loop *loop)
5511 loop_vec_info loop_vinfo;
5512 tree loop_cond;
5513 tree number_of_iterations = NULL;
5514 bool rescan = false;
5515 LOC loop_loc;
5517 loop_loc = find_loop_location (loop);
5519 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
5520 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
5522 if (loop->inner)
5524 if (vect_print_dump_info (REPORT_OUTER_LOOPS, loop_loc))
5525 fprintf (vect_dump, "not vectorized: nested loop.");
5526 return NULL;
5529 if (!loop->single_exit
5530 || loop->num_nodes != 2
5531 || EDGE_COUNT (loop->header->preds) != 2
5532 || loop->num_entries != 1)
5534 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
5536 if (!loop->single_exit)
5537 fprintf (vect_dump, "not vectorized: multiple exits.");
5538 else if (loop->num_nodes != 2)
5539 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
5540 else if (EDGE_COUNT (loop->header->preds) != 2)
5541 fprintf (vect_dump, "not vectorized: too many incoming edges.");
5542 else if (loop->num_entries != 1)
5543 fprintf (vect_dump, "not vectorized: too many entries.");
5546 return NULL;
5549 /* We assume that the loop exit condition is at the end of the loop. i.e,
5550 that the loop is represented as a do-while (with a proper if-guard
5551 before the loop if needed), where the loop header contains all the
5552 executable statements, and the latch is empty. */
5553 if (!empty_block_p (loop->latch))
5555 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
5556 fprintf (vect_dump, "not vectorized: unexpectd loop form.");
5557 return NULL;
5560 /* Make sure we have a preheader basic block. */
5561 if (!loop->pre_header)
5563 rescan = true;
5564 loop_split_edge_with (loop_preheader_edge (loop), NULL);
5567 /* Make sure there exists a single-predecessor exit bb: */
5568 if (EDGE_COUNT (loop->exit_edges[0]->dest->preds) != 1)
5570 rescan = true;
5571 loop_split_edge_with (loop->exit_edges[0], NULL);
5574 if (rescan)
5576 flow_loop_scan (loop, LOOP_ALL);
5577 /* Flow loop scan does not update loop->single_exit field. */
5578 loop->single_exit = loop->exit_edges[0];
5581 if (empty_block_p (loop->header))
5583 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
5584 fprintf (vect_dump, "not vectorized: empty loop.");
5585 return NULL;
5588 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5589 if (!loop_cond)
5591 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
5592 fprintf (vect_dump, "not vectorized: complicated exit condition.");
5593 return NULL;
5596 if (!number_of_iterations)
5598 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
5599 fprintf (vect_dump,
5600 "not vectorized: number of iterations cannot be computed.");
5601 return NULL;
5604 if (chrec_contains_undetermined (number_of_iterations))
5606 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
5607 fprintf (vect_dump, "Infinite number of iterations.");
5608 return false;
5611 loop_vinfo = new_loop_vec_info (loop);
5612 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5614 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5616 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
5618 fprintf (vect_dump, "Symbolic number of iterations is ");
5619 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
5622 else
5623 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5625 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, loop_loc))
5626 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
5627 return NULL;
5630 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5631 LOOP_VINFO_LOC (loop_vinfo) = loop_loc;
5633 return loop_vinfo;
5637 /* Function vect_analyze_loop.
5639 Apply a set of analyses on LOOP, and create a loop_vec_info struct
5640 for it. The different analyses will record information in the
5641 loop_vec_info struct. */
5643 static loop_vec_info
5644 vect_analyze_loop (struct loop *loop)
5646 bool ok;
5647 loop_vec_info loop_vinfo;
5649 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5650 fprintf (vect_dump, "===== analyze_loop_nest =====");
5652 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
5654 loop_vinfo = vect_analyze_loop_form (loop);
5655 if (!loop_vinfo)
5657 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5658 fprintf (vect_dump, "bad loop form.");
5659 return NULL;
5662 /* Find all data references in the loop (which correspond to vdefs/vuses)
5663 and analyze their evolution in the loop.
5665 FORNOW: Handle only simple, array references, which
5666 alignment can be forced, and aligned pointer-references. */
5668 ok = vect_analyze_data_refs (loop_vinfo);
5669 if (!ok)
5671 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
5672 fprintf (vect_dump, "bad data references.");
5673 destroy_loop_vec_info (loop_vinfo);
5674 return NULL;
5677 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
5679 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5680 if (!ok)
5682 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
5683 fprintf (vect_dump, "unexpected pattern.");
5684 destroy_loop_vec_info (loop_vinfo);
5685 return NULL;
5688 /* Check that all cross-iteration scalar data-flow cycles are OK.
5689 Cross-iteration cycles caused by virtual phis are analyzed separately. */
5691 ok = vect_analyze_scalar_cycles (loop_vinfo);
5692 if (!ok)
5694 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
5695 fprintf (vect_dump, "bad scalar cycle.");
5696 destroy_loop_vec_info (loop_vinfo);
5697 return NULL;
5700 /* Analyze data dependences between the data-refs in the loop.
5701 FORNOW: fail at the first data dependence that we encounter. */
5703 ok = vect_analyze_data_ref_dependences (loop_vinfo);
5704 if (!ok)
5706 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
5707 fprintf (vect_dump, "bad data dependence.");
5708 destroy_loop_vec_info (loop_vinfo);
5709 return NULL;
5712 /* Analyze the access patterns of the data-refs in the loop (consecutive,
5713 complex, etc.). FORNOW: Only handle consecutive access pattern. */
5715 ok = vect_analyze_data_ref_accesses (loop_vinfo);
5716 if (!ok)
5718 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
5719 fprintf (vect_dump, "bad data access.");
5720 destroy_loop_vec_info (loop_vinfo);
5721 return NULL;
5724 /* Analyze the alignment of the data-refs in the loop.
5725 FORNOW: Only aligned accesses are handled. */
5727 ok = vect_analyze_data_refs_alignment (loop_vinfo);
5728 if (!ok)
5730 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
5731 fprintf (vect_dump, "bad data alignment.");
5732 destroy_loop_vec_info (loop_vinfo);
5733 return NULL;
5736 /* Scan all the operations in the loop and make sure they are
5737 vectorizable. */
5739 ok = vect_analyze_operations (loop_vinfo);
5740 if (!ok)
5742 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
5743 fprintf (vect_dump, "bad operation or unsupported loop bound.");
5744 destroy_loop_vec_info (loop_vinfo);
5745 return NULL;
5748 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5750 return loop_vinfo;
5754 /* Function need_imm_uses_for.
5756 Return whether we ought to include information for 'var'
5757 when calculating immediate uses. For this pass we only want use
5758 information for non-virtual variables. */
5760 static bool
5761 need_imm_uses_for (tree var)
5763 return is_gimple_reg (var);
5767 /* Function vectorize_loops.
5769 Entry Point to loop vectorization phase. */
5771 void
5772 vectorize_loops (struct loops *loops)
5774 unsigned int i, loops_num;
5775 unsigned int num_vectorized_loops = 0;
5777 /* Fix the verbosity level if not defined explicitly by the user. */
5778 vect_set_dump_settings ();
5780 /* Does the target support SIMD? */
5781 /* FORNOW: until more sophisticated machine modelling is in place. */
5782 if (!UNITS_PER_SIMD_WORD)
5784 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5785 fprintf (vect_dump, "vectorizer: target vector size is not defined.");
5786 return;
5789 #ifdef ENABLE_CHECKING
5790 verify_loop_closed_ssa ();
5791 #endif
5793 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5795 /* ----------- Analyze loops. ----------- */
5797 /* If some loop was duplicated, it gets bigger number
5798 than all previously defined loops. This fact allows us to run
5799 only over initial loops skipping newly generated ones. */
5800 loops_num = loops->num;
5801 for (i = 1; i < loops_num; i++)
5803 loop_vec_info loop_vinfo;
5804 struct loop *loop = loops->parray[i];
5806 if (!loop)
5807 continue;
5809 loop_vinfo = vect_analyze_loop (loop);
5810 loop->aux = loop_vinfo;
5812 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5813 continue;
5815 vect_transform_loop (loop_vinfo, loops);
5816 num_vectorized_loops++;
5819 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, UNKNOWN_LOC))
5820 fprintf (vect_dump, "vectorized %u loops in function.\n",
5821 num_vectorized_loops);
5823 /* ----------- Finalize. ----------- */
5825 free_df ();
5826 for (i = 1; i < loops_num; i++)
5828 struct loop *loop = loops->parray[i];
5829 loop_vec_info loop_vinfo;
5831 if (!loop)
5832 continue;
5833 loop_vinfo = loop->aux;
5834 destroy_loop_vec_info (loop_vinfo);
5835 loop->aux = NULL;
5838 rewrite_into_ssa (false);
5839 rewrite_into_loop_closed_ssa (); /* FORNOW */
5840 bitmap_clear (vars_to_rename);