2004-11-24 Kelley Cook <kcook@gcc.gnu.org>
[official-gcc.git] / gcc / tree-vectorizer.c
blob8853e88da9c73f9b980ede80fe1490d01e1c6dba
1 /* Loop Vectorization
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
33 for (i=0; i<N; i++){
34 a[i] = b[i] + c[i];
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
42 v8hi va, vb, vc;
44 for (i=0; i<N/8; i++){
45 vb = pb[i];
46 vc = pc[i];
47 va = vb + vc;
48 pa[i] = va;
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
64 Analysis phase:
65 ===============
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
75 Transformation phase:
76 =====================
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
88 VS1: vb = px[i];
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
90 S2: a = b;
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
97 VS1: vb = px[i];
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
99 VS2: va = vb;
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
105 Target modeling:
106 =================
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
123 #include "config.h"
124 #include "system.h"
125 #include "coretypes.h"
126 #include "tm.h"
127 #include "errors.h"
128 #include "ggc.h"
129 #include "tree.h"
130 #include "target.h"
132 #include "rtl.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
137 #include "timevar.h"
138 #include "cfgloop.h"
139 #include "cfglayout.h"
140 #include "expr.h"
141 #include "optabs.h"
142 #include "toplev.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
148 #include "langhooks.h"
151 /*************************************************************************
152 Simple Loop Peeling Utilities
153 *************************************************************************/
155 /* Entry point for peeling of simple loops.
156 Peel the first/last iterations of a loop.
157 It can be used outside of the vectorizer for loops that are simple enough
158 (see function documentation). In the vectorizer it is used to peel the
159 last few iterations when the loop bound is unknown or does not evenly
160 divide by the vectorization factor, and to peel the first few iterations
161 to force the alignment of data references in the loop. */
162 struct loop *slpeel_tree_peel_loop_to_edge
163 (struct loop *, struct loops *, edge, tree, tree, bool);
164 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
165 (struct loop *, struct loops *, edge);
166 static void slpeel_update_phis_for_duplicate_loop
167 (struct loop *, struct loop *, bool after);
168 static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool);
169 static void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
170 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
171 static bool slpeel_can_duplicate_loop_p (struct loop *, edge);
172 static void allocate_new_names (bitmap);
173 static void rename_use_op (use_operand_p);
174 static void rename_def_op (def_operand_p, tree);
175 static void rename_variables_in_bb (basic_block);
176 static void free_new_names (bitmap);
177 static void rename_variables_in_loop (struct loop *);
178 #ifdef ENABLE_CHECKING
179 static void slpeel_verify_cfg_after_peeling (struct loop *, struct loop *);
180 #endif
183 /*************************************************************************
184 Vectorization Utilities.
185 *************************************************************************/
187 /* Main analysis functions. */
188 static loop_vec_info vect_analyze_loop (struct loop *);
189 static loop_vec_info vect_analyze_loop_form (struct loop *);
190 static bool vect_analyze_data_refs (loop_vec_info);
191 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
192 static bool vect_analyze_scalar_cycles (loop_vec_info);
193 static bool vect_analyze_data_ref_accesses (loop_vec_info);
194 static bool vect_analyze_data_refs_alignment (loop_vec_info);
195 static bool vect_compute_data_refs_alignment (loop_vec_info);
196 static bool vect_analyze_operations (loop_vec_info);
198 /* Main code transformation functions. */
199 static void vect_transform_loop (loop_vec_info, struct loops *);
200 static bool vect_transform_stmt (tree, block_stmt_iterator *);
201 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
202 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
203 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
204 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
205 static enum dr_alignment_support vect_supportable_dr_alignment
206 (struct data_reference *);
207 static void vect_align_data_ref (tree);
208 static void vect_enhance_data_refs_alignment (loop_vec_info);
210 /* Utility functions for the analyses. */
211 static bool vect_is_simple_use (tree , struct loop *, tree *);
212 static bool exist_non_indexing_operands_for_use_p (tree, tree);
213 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
214 static void vect_mark_relevant (varray_type, tree);
215 static bool vect_stmt_relevant_p (tree, loop_vec_info);
216 static tree vect_get_loop_niters (struct loop *, tree *);
217 static bool vect_compute_data_ref_alignment
218 (struct data_reference *, loop_vec_info);
219 static bool vect_analyze_data_ref_access (struct data_reference *);
220 static bool vect_get_first_index (tree, tree *);
221 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
222 static struct data_reference * vect_analyze_pointer_ref_access
223 (tree, tree, bool);
224 static bool vect_can_advance_ivs_p (struct loop *);
225 static tree vect_get_base_and_bit_offset
226 (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
227 static struct data_reference * vect_analyze_pointer_ref_access
228 (tree, tree, bool);
229 static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *);
230 static tree vect_compute_array_ref_alignment
231 (struct data_reference *, loop_vec_info, tree, tree *);
232 static tree vect_get_ptr_offset (tree, tree, tree *);
233 static tree vect_get_symbl_and_dr
234 (tree, tree, bool, loop_vec_info, struct data_reference **);
236 /* Utility functions for the code transformation. */
237 static tree vect_create_destination_var (tree, tree);
238 static tree vect_create_data_ref_ptr
239 (tree, block_stmt_iterator *, tree, tree *, bool);
240 static tree vect_create_index_for_vector_ref
241 (struct loop *, block_stmt_iterator *);
242 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
243 static tree get_vectype_for_scalar_type (tree);
244 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
245 static tree vect_get_vec_def_for_operand (tree, tree);
246 static tree vect_init_vector (tree, tree);
247 static void vect_finish_stmt_generation
248 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
250 /* Utility function dealing with loop peeling (not peeling itself). */
251 static void vect_generate_tmps_on_preheader
252 (loop_vec_info, tree *, tree *, tree *);
253 static tree vect_build_loop_niters (loop_vec_info);
254 static void vect_update_ivs_after_vectorizer (struct loop *, tree, edge);
255 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
256 static void vect_update_inits_of_dr
257 (struct data_reference *, struct loop *, tree niters);
258 static void vect_update_inits_of_drs (loop_vec_info, tree);
259 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
260 static void vect_do_peeling_for_loop_bound
261 (loop_vec_info, tree *, struct loops *);
263 /* Utilities for creation and deletion of vec_info structs. */
264 loop_vec_info new_loop_vec_info (struct loop *loop);
265 void destroy_loop_vec_info (loop_vec_info);
266 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
268 static bool vect_debug_stats (struct loop *loop);
269 static bool vect_debug_details (struct loop *loop);
272 /*************************************************************************
273 Simple Loop Peeling Utilities
275 Utilities to support loop peeling for vectorization purposes.
276 *************************************************************************/
279 /* For each definition in DEFINITIONS this function allocates
280 new ssa name. */
282 static void
283 allocate_new_names (bitmap definitions)
285 unsigned ver;
286 bitmap_iterator bi;
288 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
290 tree def = ssa_name (ver);
291 tree *new_name_ptr = xmalloc (sizeof (tree));
293 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
295 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
296 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
298 SSA_NAME_AUX (def) = new_name_ptr;
303 /* Renames the use *OP_P. */
305 static void
306 rename_use_op (use_operand_p op_p)
308 tree *new_name_ptr;
310 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
311 return;
313 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
315 /* Something defined outside of the loop. */
316 if (!new_name_ptr)
317 return;
319 /* An ordinary ssa name defined in the loop. */
321 SET_USE (op_p, *new_name_ptr);
325 /* Renames the def *OP_P in statement STMT. */
327 static void
328 rename_def_op (def_operand_p op_p, tree stmt)
330 tree *new_name_ptr;
332 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
333 return;
335 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
337 /* Something defined outside of the loop. */
338 if (!new_name_ptr)
339 return;
341 /* An ordinary ssa name defined in the loop. */
343 SET_DEF (op_p, *new_name_ptr);
344 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
348 /* Renames the variables in basic block BB. */
350 static void
351 rename_variables_in_bb (basic_block bb)
353 tree phi;
354 block_stmt_iterator bsi;
355 tree stmt;
356 stmt_ann_t ann;
357 use_optype uses;
358 vuse_optype vuses;
359 def_optype defs;
360 v_may_def_optype v_may_defs;
361 v_must_def_optype v_must_defs;
362 unsigned i;
363 edge e;
364 edge_iterator ei;
365 struct loop *loop = bb->loop_father;
367 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
368 rename_def_op (PHI_RESULT_PTR (phi), phi);
370 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
372 stmt = bsi_stmt (bsi);
373 get_stmt_operands (stmt);
374 ann = stmt_ann (stmt);
376 uses = USE_OPS (ann);
377 for (i = 0; i < NUM_USES (uses); i++)
378 rename_use_op (USE_OP_PTR (uses, i));
380 defs = DEF_OPS (ann);
381 for (i = 0; i < NUM_DEFS (defs); i++)
382 rename_def_op (DEF_OP_PTR (defs, i), stmt);
384 vuses = VUSE_OPS (ann);
385 for (i = 0; i < NUM_VUSES (vuses); i++)
386 rename_use_op (VUSE_OP_PTR (vuses, i));
388 v_may_defs = V_MAY_DEF_OPS (ann);
389 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
391 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
392 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
395 v_must_defs = V_MUST_DEF_OPS (ann);
396 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
398 rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
399 rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
403 FOR_EACH_EDGE (e, ei, bb->succs)
405 if (!flow_bb_inside_loop_p (loop, e->dest))
406 continue;
407 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
408 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
413 /* Releases the structures holding the new ssa names. */
415 static void
416 free_new_names (bitmap definitions)
418 unsigned ver;
419 bitmap_iterator bi;
421 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
423 tree def = ssa_name (ver);
425 if (SSA_NAME_AUX (def))
427 free (SSA_NAME_AUX (def));
428 SSA_NAME_AUX (def) = NULL;
434 /* Renames variables in new generated LOOP. */
436 static void
437 rename_variables_in_loop (struct loop *loop)
439 unsigned i;
440 basic_block *bbs;
442 bbs = get_loop_body (loop);
444 for (i = 0; i < loop->num_nodes; i++)
445 rename_variables_in_bb (bbs[i]);
447 free (bbs);
451 /* Update the PHI nodes of NEW_LOOP.
453 NEW_LOOP is a duplicate of ORIG_LOOP.
454 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
455 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
456 executes before it. */
458 static void
459 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
460 struct loop *new_loop, bool after)
462 tree *new_name_ptr, new_ssa_name;
463 tree phi_new, phi_orig;
464 tree def;
465 edge orig_loop_latch = loop_latch_edge (orig_loop);
466 edge orig_entry_e = loop_preheader_edge (orig_loop);
467 edge new_loop_exit_e = new_loop->exit_edges[0];
468 edge new_loop_entry_e = loop_preheader_edge (new_loop);
469 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
472 step 1. For each loop-header-phi:
473 Add the first phi argument for the phi in NEW_LOOP
474 (the one associated with the entry of NEW_LOOP)
476 step 2. For each loop-header-phi:
477 Add the second phi argument for the phi in NEW_LOOP
478 (the one associated with the latch of NEW_LOOP)
480 step 3. Update the phis in the successor block of NEW_LOOP.
482 case 1: NEW_LOOP was placed before ORIG_LOOP:
483 The successor block of NEW_LOOP is the header of ORIG_LOOP.
484 Updating the phis in the successor block can therefore be done
485 along with the scanning of the loop header phis, because the
486 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
487 phi nodes, organized in the same order.
489 case 2: NEW_LOOP was placed after ORIG_LOOP:
490 The successor block of NEW_LOOP is the original exit block of
491 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
492 We postpone updating these phis to a later stage (when
493 loop guards are added).
497 /* Scan the phis in the headers of the old and new loops
498 (they are organized in exactly the same order). */
500 for (phi_new = phi_nodes (new_loop->header),
501 phi_orig = phi_nodes (orig_loop->header);
502 phi_new && phi_orig;
503 phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
505 /* step 1. */
506 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
507 add_phi_arg (&phi_new, def, new_loop_entry_e);
509 /* step 2. */
510 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
511 if (TREE_CODE (def) != SSA_NAME)
512 continue;
514 new_name_ptr = SSA_NAME_AUX (def);
515 if (!new_name_ptr)
516 /* Something defined outside of the loop. */
517 continue;
519 /* An ordinary ssa name defined in the loop. */
520 new_ssa_name = *new_name_ptr;
521 add_phi_arg (&phi_new, new_ssa_name, loop_latch_edge (new_loop));
523 /* step 3 (case 1). */
524 if (!after)
526 gcc_assert (new_loop_exit_e == orig_entry_e);
527 SET_PHI_ARG_DEF (phi_orig,
528 phi_arg_from_edge (phi_orig, new_loop_exit_e),
529 new_ssa_name);
535 /* Update PHI nodes for a guard of the LOOP.
537 Input:
538 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
539 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
540 originates from the guard-bb, skips LOOP and reaches the (unique) exit
541 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
542 We denote this bb NEW_MERGE_BB because it had a single predecessor (the
543 LOOP header) before the guard code was added, and now it became a merge
544 point of two paths - the path that ends with the LOOP exit-edge, and
545 the path that ends with GUARD_EDGE.
547 This function creates and updates the relevant phi nodes to account for
548 the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
549 1. Create phi nodes at NEW_MERGE_BB.
550 2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
551 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
552 was added:
554 ===> The CFG before the guard-code was added:
555 LOOP_header_bb:
556 if (exit_loop) goto update_bb : LOOP_header_bb
557 update_bb:
559 ==> The CFG after the guard-code was added:
560 guard_bb:
561 if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
562 LOOP_header_bb:
563 if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
564 new_merge_bb:
565 goto update_bb
566 update_bb:
568 - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
569 UPDATE_BB are loop entry phis, like the phis in the LOOP header,
570 organized in the same order.
571 If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
572 loop exit phis.
574 - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
575 "original" loop). FALSE if LOOP is an original loop (not a newly
576 created copy). The SSA_NAME_AUX fields of the defs in the original
577 loop are the corresponding new ssa-names used in the new duplicated
578 loop copy. IS_NEW_LOOP indicates which of the two args of the phi
579 nodes in UPDATE_BB takes the original ssa-name, and which takes the
580 new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
581 the LOOP-exit-edge takes the new-name, and the phi-arg that is
582 associated with GUARD_EDGE takes the original name. If IS_NEW_LOOP is
583 FALSE, it's the other way around.
586 static void
587 slpeel_update_phi_nodes_for_guard (edge guard_edge,
588 struct loop *loop,
589 bool entry_phis,
590 bool is_new_loop)
592 tree orig_phi, new_phi, update_phi;
593 tree guard_arg, loop_arg;
594 basic_block new_merge_bb = guard_edge->dest;
595 edge e = EDGE_SUCC (new_merge_bb, 0);
596 basic_block update_bb = e->dest;
597 basic_block orig_bb = (entry_phis ? loop->header : update_bb);
599 for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
600 orig_phi && update_phi;
601 orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
603 /* 1. Generate new phi node in NEW_MERGE_BB: */
604 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
605 new_merge_bb);
607 /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
608 of LOOP. Set the two phi args in NEW_PHI for these edges: */
609 if (entry_phis)
611 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
612 EDGE_SUCC (loop->latch, 0));
613 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop->entry_edges[0]);
615 else /* exit phis */
617 tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
618 tree *new_name_ptr = SSA_NAME_AUX (orig_def);
619 tree new_name;
621 if (new_name_ptr)
622 new_name = *new_name_ptr;
623 else
624 /* Something defined outside of the loop */
625 new_name = orig_def;
627 if (is_new_loop)
629 guard_arg = orig_def;
630 loop_arg = new_name;
632 else
634 guard_arg = new_name;
635 loop_arg = orig_def;
638 add_phi_arg (&new_phi, loop_arg, loop->exit_edges[0]);
639 add_phi_arg (&new_phi, guard_arg, guard_edge);
641 /* 3. Update phi in successor block. */
642 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
643 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
644 SET_PHI_ARG_DEF (update_phi, phi_arg_from_edge (update_phi, e),
645 PHI_RESULT (new_phi));
648 set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
652 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
653 that starts at zero, increases by one and its limit is NITERS.
655 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
657 static void
658 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
660 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
661 tree orig_cond;
662 edge exit_edge = loop->exit_edges[0];
663 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
664 tree begin_label = tree_block_label (loop->latch);
665 tree exit_label = tree_block_label (loop->single_exit->dest);
666 tree init = build_int_cst (TREE_TYPE (niters), 0);
667 tree step = build_int_cst (TREE_TYPE (niters), 1);
669 orig_cond = get_loop_exit_condition (loop);
670 gcc_assert (orig_cond);
671 create_iv (init, step, NULL_TREE, loop,
672 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
674 /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
675 back to the exit condition statement. */
676 bsi_next (&loop_exit_bsi);
677 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
679 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
680 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
681 else /* 'then' edge loops back. */
682 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
684 begin_label = build1 (GOTO_EXPR, void_type_node, begin_label);
685 exit_label = build1 (GOTO_EXPR, void_type_node, exit_label);
686 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
687 begin_label, exit_label);
688 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
690 /* Remove old loop exit test: */
691 bsi_remove (&loop_exit_bsi);
693 if (vect_debug_stats (loop) || vect_debug_details (loop))
694 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
696 loop->nb_iterations = niters;
700 /* Given LOOP this function generates a new copy of it and puts it
701 on E which is either the entry or exit of LOOP. */
703 static struct loop *
704 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
705 edge e)
707 struct loop *new_loop;
708 basic_block *new_bbs, *bbs;
709 bool at_exit;
710 bool was_imm_dom;
711 basic_block exit_dest;
712 tree phi, phi_arg;
714 at_exit = (e == loop->exit_edges[0]);
715 if (!at_exit && e != loop_preheader_edge (loop))
717 if (dump_file && (dump_flags & TDF_DETAILS))
718 fprintf (dump_file, "Edge is not an entry nor an exit edge.\n");
719 return NULL;
722 bbs = get_loop_body (loop);
724 /* Check whether duplication is possible. */
725 if (!can_copy_bbs_p (bbs, loop->num_nodes))
727 if (vect_debug_stats (loop) || vect_debug_details (loop))
728 fprintf (dump_file, "Cannot copy basic blocks.\n");
729 free (bbs);
730 return NULL;
733 /* Generate new loop structure. */
734 new_loop = duplicate_loop (loops, loop, loop->outer);
735 if (!new_loop)
737 if (vect_debug_stats (loop) || vect_debug_details (loop))
738 fprintf (dump_file, "duplicate_loop returns NULL.\n");
739 free (bbs);
740 return NULL;
743 exit_dest = loop->exit_edges[0]->dest;
744 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
745 exit_dest) == loop->header ?
746 true : false);
748 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
750 copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
752 /* Duplicating phi args at exit bbs as coming
753 also from exit of duplicated loop. */
754 for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
756 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
757 if (phi_arg)
759 edge new_loop_exit_edge;
761 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
762 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
763 else
764 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
766 add_phi_arg (&phi, phi_arg, new_loop_exit_edge);
770 if (at_exit) /* Add the loop copy at exit. */
772 redirect_edge_and_branch_force (e, new_loop->header);
773 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
774 if (was_imm_dom)
775 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
777 else /* Add the copy at entry. */
779 edge new_exit_e;
780 edge entry_e = loop_preheader_edge (loop);
781 basic_block preheader = entry_e->src;
783 if (!flow_bb_inside_loop_p (new_loop,
784 EDGE_SUCC (new_loop->header, 0)->dest))
785 new_exit_e = EDGE_SUCC (new_loop->header, 0);
786 else
787 new_exit_e = EDGE_SUCC (new_loop->header, 1);
789 redirect_edge_and_branch_force (new_exit_e, loop->header);
790 set_immediate_dominator (CDI_DOMINATORS, loop->header,
791 new_exit_e->src);
793 /* We have to add phi args to the loop->header here as coming
794 from new_exit_e edge. */
795 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
797 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
798 if (phi_arg)
799 add_phi_arg (&phi, phi_arg, new_exit_e);
802 redirect_edge_and_branch_force (entry_e, new_loop->header);
803 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
806 flow_loop_scan (new_loop, LOOP_ALL);
807 flow_loop_scan (loop, LOOP_ALL);
808 free (new_bbs);
809 free (bbs);
811 return new_loop;
815 /* Given the condition statement COND, put it as the last statement
816 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
817 Assumes that this is the single exit of the guarded loop.
818 Returns the skip edge. */
820 static edge
821 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
822 basic_block dom_bb)
824 block_stmt_iterator bsi;
825 edge new_e, enter_e;
826 tree cond_stmt, then_label, else_label;
828 enter_e = EDGE_SUCC (guard_bb, 0);
829 enter_e->flags &= ~EDGE_FALLTHRU;
830 enter_e->flags |= EDGE_FALSE_VALUE;
831 bsi = bsi_last (guard_bb);
833 then_label = build1 (GOTO_EXPR, void_type_node,
834 tree_block_label (exit_bb));
835 else_label = build1 (GOTO_EXPR, void_type_node,
836 tree_block_label (enter_e->dest));
837 cond_stmt = build3 (COND_EXPR, void_type_node, cond,
838 then_label, else_label);
839 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
840 /* Add new edge to connect entry block to the second loop. */
841 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
842 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
843 return new_e;
847 /* This function verifies that the following restrictions apply to LOOP:
848 (1) it is innermost
849 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
850 (3) it is single entry, single exit
851 (4) its exit condition is the last stmt in the header
852 (5) E is the entry/exit edge of LOOP.
855 static bool
856 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
858 edge exit_e = loop->exit_edges [0];
859 edge entry_e = loop_preheader_edge (loop);
860 tree orig_cond = get_loop_exit_condition (loop);
861 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
863 if (any_marked_for_rewrite_p ())
864 return false;
866 if (loop->inner
867 /* All loops have an outer scope; the only case loop->outer is NULL is for
868 the function itself. */
869 || !loop->outer
870 || loop->num_nodes != 2
871 || !empty_block_p (loop->latch)
872 || loop->num_exits != 1
873 || loop->num_entries != 1
874 /* Verify that new loop exit condition can be trivially modified. */
875 || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
876 || (e != exit_e && e != entry_e))
877 return false;
879 return true;
882 #ifdef ENABLE_CHECKING
883 static void
884 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
885 struct loop *second_loop)
887 basic_block loop1_exit_bb = first_loop->exit_edges[0]->dest;
888 basic_block loop2_entry_bb = second_loop->pre_header;
889 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
891 /* A guard that controls whether the second_loop is to be executed or skipped
892 is placed in first_loop->exit. first_loopt->exit therefore has two
893 successors - one is the preheader of second_loop, and the other is a bb
894 after second_loop.
896 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
899 /* 1. Verify that one of the successors of first_loopt->exit is the preheader
900 of second_loop. */
902 /* The preheader of new_loop is expected to have two predessors:
903 first_loop->exit and the block that precedes first_loop. */
905 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
906 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
907 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
908 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
909 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
911 /* Verify that the other successor of first_loopt->exit is after the
912 second_loop. */
913 /* TODO */
915 #endif
917 /* Function slpeel_tree_peel_loop_to_edge.
919 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
920 that is placed on the entry (exit) edge E of LOOP. After this transformation
921 we have two loops one after the other - first-loop iterates FIRST_NITERS
922 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
924 Input:
925 - LOOP: the loop to be peeled.
926 - E: the exit or entry edge of LOOP.
927 If it is the entry edge, we peel the first iterations of LOOP. In this
928 case first-loop is LOOP, and second-loop is the newly created loop.
929 If it is the exit edge, we peel the last iterations of LOOP. In this
930 case, first-loop is the newly created loop, and second-loop is LOOP.
931 - NITERS: the number of iterations that LOOP iterates.
932 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
933 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
934 for updating the loop bound of the first-loop to FIRST_NITERS. If it
935 is false, the caller of this function may want to take care of this
936 (this can be useful if we don't want new stmts added to first-loop).
938 Output:
939 The function returns a pointer to the new loop-copy, or NULL if it failed
940 to perform the transformation.
942 The function generates two if-then-else guards: one before the first loop,
943 and the other before the second loop:
944 The first guard is:
945 if (FIRST_NITERS == 0) then skip the first loop,
946 and go directly to the second loop.
947 The second guard is:
948 if (FIRST_NITERS == NITERS) then skip the second loop.
950 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
951 FORNOW the resulting code will not be in loop-closed-ssa form.
954 struct loop*
955 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
956 edge e, tree first_niters,
957 tree niters, bool update_first_loop_count)
959 struct loop *new_loop = NULL, *first_loop, *second_loop;
960 edge skip_e;
961 tree pre_condition;
962 bitmap definitions;
963 basic_block bb_before_second_loop, bb_after_second_loop;
964 basic_block bb_before_first_loop;
965 basic_block bb_between_loops;
966 edge exit_e = loop->exit_edges [0];
968 if (!slpeel_can_duplicate_loop_p (loop, e))
969 return NULL;
971 /* We have to initialize cfg_hooks. Then, when calling
972 cfg_hooks->split_edge, the function tree_split_edge
973 is actually called and, when calling cfg_hooks->duplicate_block,
974 the function tree_duplicate_bb is called. */
975 tree_register_cfg_hooks ();
978 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
979 Resulting CFG would be:
981 first_loop:
982 do {
983 } while ...
985 second_loop:
986 do {
987 } while ...
989 orig_exit_bb:
992 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
994 if (vect_debug_stats (loop) || vect_debug_details (loop))
995 fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
996 return NULL;
999 if (e == exit_e)
1001 /* NEW_LOOP was placed after LOOP. */
1002 first_loop = loop;
1003 second_loop = new_loop;
1005 else
1007 /* NEW_LOOP was placed before LOOP. */
1008 first_loop = new_loop;
1009 second_loop = loop;
1012 definitions = marked_ssa_names ();
1013 allocate_new_names (definitions);
1014 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1015 rename_variables_in_loop (new_loop);
1018 /* 2. Add the guard that controls whether the first loop is executed.
1019 Resulting CFG would be:
1021 bb_before_first_loop:
1022 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1023 GOTO first-loop
1025 first_loop:
1026 do {
1027 } while ...
1029 bb_before_second_loop:
1031 second_loop:
1032 do {
1033 } while ...
1035 orig_exit_bb:
1038 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1039 add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1040 bb_before_second_loop = split_edge (first_loop->exit_edges[0]);
1041 add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1042 flow_loop_scan (first_loop, LOOP_ALL);
1043 flow_loop_scan (second_loop, LOOP_ALL);
1045 pre_condition =
1046 build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
1047 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1048 bb_before_second_loop, bb_before_first_loop);
1049 slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
1050 first_loop == new_loop);
1053 /* 3. Add the guard that controls whether the second loop is executed.
1054 Resulting CFG would be:
1056 bb_before_first_loop:
1057 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1058 GOTO first-loop
1060 first_loop:
1061 do {
1062 } while ...
1064 bb_between_loops:
1065 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1066 GOTO bb_before_second_loop
1068 bb_before_second_loop:
1070 second_loop:
1071 do {
1072 } while ...
1074 bb_after_second_loop:
1076 orig_exit_bb:
1079 bb_between_loops = split_edge (first_loop->exit_edges[0]);
1080 add_bb_to_loop (bb_between_loops, first_loop->outer);
1081 bb_after_second_loop = split_edge (second_loop->exit_edges[0]);
1082 add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1083 flow_loop_scan (first_loop, LOOP_ALL);
1084 flow_loop_scan (second_loop, LOOP_ALL);
1086 pre_condition = build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1087 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1088 bb_after_second_loop, bb_before_first_loop);
1089 slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
1090 second_loop == new_loop);
1092 /* Flow loop scan does not update loop->single_exit field. */
1093 first_loop->single_exit = first_loop->exit_edges[0];
1094 second_loop->single_exit = second_loop->exit_edges[0];
1096 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1098 if (update_first_loop_count)
1099 slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1101 free_new_names (definitions);
1102 BITMAP_XFREE (definitions);
1103 unmark_all_for_rewrite ();
1105 return new_loop;
1109 /* Here the proper Vectorizer starts. */
1111 /*************************************************************************
1112 Vectorization Utilities.
1113 *************************************************************************/
1115 /* Function new_stmt_vec_info.
1117 Create and initialize a new stmt_vec_info struct for STMT. */
1119 stmt_vec_info
1120 new_stmt_vec_info (tree stmt, struct loop *loop)
1122 stmt_vec_info res;
1123 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1125 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1126 STMT_VINFO_STMT (res) = stmt;
1127 STMT_VINFO_LOOP (res) = loop;
1128 STMT_VINFO_RELEVANT_P (res) = 0;
1129 STMT_VINFO_VECTYPE (res) = NULL;
1130 STMT_VINFO_VEC_STMT (res) = NULL;
1131 STMT_VINFO_DATA_REF (res) = NULL;
1132 STMT_VINFO_MEMTAG (res) = NULL;
1133 STMT_VINFO_VECT_DR_BASE (res) = NULL;
1135 return res;
1139 /* Function new_loop_vec_info.
1141 Create and initialize a new loop_vec_info struct for LOOP, as well as
1142 stmt_vec_info structs for all the stmts in LOOP. */
1144 loop_vec_info
1145 new_loop_vec_info (struct loop *loop)
1147 loop_vec_info res;
1148 basic_block *bbs;
1149 block_stmt_iterator si;
1150 unsigned int i;
1152 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1154 bbs = get_loop_body (loop);
1156 /* Create stmt_info for all stmts in the loop. */
1157 for (i = 0; i < loop->num_nodes; i++)
1159 basic_block bb = bbs[i];
1160 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1162 tree stmt = bsi_stmt (si);
1163 stmt_ann_t ann;
1165 get_stmt_operands (stmt);
1166 ann = stmt_ann (stmt);
1167 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
1171 LOOP_VINFO_LOOP (res) = loop;
1172 LOOP_VINFO_BBS (res) = bbs;
1173 LOOP_VINFO_EXIT_COND (res) = NULL;
1174 LOOP_VINFO_NITERS (res) = NULL;
1175 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1176 LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1177 LOOP_VINFO_VECT_FACTOR (res) = 0;
1178 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1179 "loop_write_datarefs");
1180 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1181 "loop_read_datarefs");
1182 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1184 return res;
1188 /* Function destroy_loop_vec_info.
1190 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1191 stmts in the loop. */
1193 void
1194 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1196 struct loop *loop;
1197 basic_block *bbs;
1198 int nbbs;
1199 block_stmt_iterator si;
1200 int j;
1202 if (!loop_vinfo)
1203 return;
1205 loop = LOOP_VINFO_LOOP (loop_vinfo);
1207 bbs = LOOP_VINFO_BBS (loop_vinfo);
1208 nbbs = loop->num_nodes;
1210 for (j = 0; j < nbbs; j++)
1212 basic_block bb = bbs[j];
1213 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1215 tree stmt = bsi_stmt (si);
1216 stmt_ann_t ann = stmt_ann (stmt);
1217 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1218 free (stmt_info);
1219 set_stmt_info (ann, NULL);
1223 free (LOOP_VINFO_BBS (loop_vinfo));
1224 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1225 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1227 free (loop_vinfo);
1231 /* Function debug_loop_stats.
1233 For vectorization statistics dumps. */
1235 static bool
1236 vect_debug_stats (struct loop *loop)
1238 basic_block bb;
1239 block_stmt_iterator si;
1240 tree node = NULL_TREE;
1242 if (!dump_file || !(dump_flags & TDF_STATS))
1243 return false;
1245 if (!loop)
1247 fprintf (dump_file, "\n");
1248 return true;
1251 if (!loop->header)
1252 return false;
1254 bb = loop->header;
1256 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1258 node = bsi_stmt (si);
1259 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1260 break;
1263 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1264 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1266 fprintf (dump_file, "\nloop at %s:%d: ",
1267 EXPR_FILENAME (node), EXPR_LINENO (node));
1268 return true;
1271 return false;
1275 /* Function debug_loop_details.
1277 For vectorization debug dumps. */
1279 static bool
1280 vect_debug_details (struct loop *loop)
1282 basic_block bb;
1283 block_stmt_iterator si;
1284 tree node = NULL_TREE;
1286 if (!dump_file || !(dump_flags & TDF_DETAILS))
1287 return false;
1289 if (!loop)
1291 fprintf (dump_file, "\n");
1292 return true;
1295 if (!loop->header)
1296 return false;
1298 bb = loop->header;
1300 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1302 node = bsi_stmt (si);
1303 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1304 break;
1307 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1308 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1310 fprintf (dump_file, "\nloop at %s:%d: ",
1311 EXPR_FILENAME (node), EXPR_LINENO (node));
1312 return true;
1315 return false;
1319 /* Function vect_get_ptr_offset
1321 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
1323 static tree
1324 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1325 tree vectype ATTRIBUTE_UNUSED,
1326 tree *offset ATTRIBUTE_UNUSED)
1328 /* TODO: Use alignment information. */
1329 return NULL_TREE;
1333 /* Function vect_get_base_and_bit_offset
1335 Return the BASE of the data reference EXPR.
1336 If VECTYPE is given, also compute the OFFSET from BASE in bits.
1337 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in
1338 bits of 'a.b[i] + 4B' from a.
1340 Input:
1341 EXPR - the memory reference that is being analyzed
1342 DR - the data_reference struct of the _original_ memory reference
1343 (Note: DR_REF (DR) is not necessarily EXPR)
1344 VECTYPE - the type that defines the alignment (i.e, we compute
1345 alignment relative to TYPE_ALIGN(VECTYPE))
1347 Output:
1348 BASE (returned value) - the base of the data reference EXPR.
1349 E.g, if EXPR is a.b[k].c[i][j] the returned
1350 base is a.
1351 OFFSET - offset of EXPR from BASE in bits
1352 BASE_ALIGNED_P - indicates if BASE is aligned
1354 If something unexpected is encountered (an unsupported form of data-ref),
1355 or if VECTYPE is given but OFFSET cannot be determined:
1356 then NULL_TREE is returned. */
1358 static tree
1359 vect_get_base_and_bit_offset (struct data_reference *dr,
1360 tree expr,
1361 tree vectype,
1362 loop_vec_info loop_vinfo,
1363 tree *offset,
1364 bool *base_aligned_p)
1366 tree this_offset = size_zero_node;
1367 tree base = NULL_TREE;
1368 tree next_ref;
1369 tree oprnd0, oprnd1;
1370 struct data_reference *array_dr;
1371 enum tree_code code = TREE_CODE (expr);
1373 *base_aligned_p = false;
1375 switch (code)
1377 /* These cases end the recursion: */
1378 case VAR_DECL:
1379 *offset = size_zero_node;
1380 if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1381 *base_aligned_p = true;
1382 return expr;
1384 case SSA_NAME:
1385 if (!vectype)
1386 return expr;
1388 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1389 return NULL_TREE;
1391 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1393 base = vect_get_ptr_offset (expr, vectype, offset);
1394 if (base)
1395 *base_aligned_p = true;
1397 else
1399 *base_aligned_p = true;
1400 *offset = size_zero_node;
1401 base = expr;
1403 return base;
1405 case INTEGER_CST:
1406 *offset = int_const_binop (MULT_EXPR, expr,
1407 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
1408 return expr;
1410 /* These cases continue the recursion: */
1411 case COMPONENT_REF:
1412 oprnd0 = TREE_OPERAND (expr, 0);
1413 oprnd1 = TREE_OPERAND (expr, 1);
1415 this_offset = bit_position (oprnd1);
1416 if (vectype && !host_integerp (this_offset, 1))
1417 return NULL_TREE;
1418 next_ref = oprnd0;
1419 break;
1421 case ADDR_EXPR:
1422 oprnd0 = TREE_OPERAND (expr, 0);
1423 next_ref = oprnd0;
1424 break;
1426 case INDIRECT_REF:
1427 oprnd0 = TREE_OPERAND (expr, 0);
1428 next_ref = oprnd0;
1429 break;
1431 case ARRAY_REF:
1432 if (DR_REF (dr) != expr)
1433 /* Build array data_reference struct if the existing DR_REF
1434 doesn't match EXPR. This happens, for example, when the
1435 EXPR is *T and T is initialized to &arr[indx]. The DR struct
1436 contains information on the access of T, not of arr. In order
1437 to continue the analysis, we create a new DR struct that
1438 describes the access of arr.
1440 array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr));
1441 else
1442 array_dr = dr;
1444 next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo,
1445 vectype, &this_offset);
1446 if (!next_ref)
1447 return NULL_TREE;
1449 if (vectype &&
1450 TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype))
1452 *offset = this_offset;
1453 *base_aligned_p = true;
1454 return next_ref;
1456 break;
1458 case PLUS_EXPR:
1459 case MINUS_EXPR:
1460 /* In case we have a PLUS_EXPR of the form
1461 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1462 This is verified in vect_get_symbl_and_dr. */
1463 oprnd0 = TREE_OPERAND (expr, 0);
1464 oprnd1 = TREE_OPERAND (expr, 1);
1466 base = vect_get_base_and_bit_offset
1467 (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p);
1468 if (vectype && !base)
1469 return NULL_TREE;
1471 next_ref = oprnd0;
1472 break;
1474 default:
1475 return NULL_TREE;
1478 base = vect_get_base_and_bit_offset (dr, next_ref, vectype,
1479 loop_vinfo, offset, base_aligned_p);
1481 if (vectype && base)
1483 *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
1484 if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset))
1485 return NULL_TREE;
1487 if (vect_debug_details (NULL))
1489 print_generic_expr (dump_file, expr, TDF_SLIM);
1490 fprintf (dump_file, " --> total offset for ref: ");
1491 print_generic_expr (dump_file, *offset, TDF_SLIM);
1494 return base;
1498 /* Function vect_force_dr_alignment_p.
1500 Returns whether the alignment of a DECL can be forced to be aligned
1501 on ALIGNMENT bit boundary. */
1503 static bool
1504 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1506 if (TREE_CODE (decl) != VAR_DECL)
1507 return false;
1509 if (DECL_EXTERNAL (decl))
1510 return false;
1512 if (TREE_STATIC (decl))
1513 return (alignment <= MAX_OFILE_ALIGNMENT);
1514 else
1515 /* This is not 100% correct. The absolute correct stack alignment
1516 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1517 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1518 However, until someone implements forced stack alignment, SSE
1519 isn't really usable without this. */
1520 return (alignment <= PREFERRED_STACK_BOUNDARY);
1524 /* Function vect_get_new_vect_var.
1526 Returns a name for a new variable. The current naming scheme appends the
1527 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1528 the name of vectorizer generated variables, and appends that to NAME if
1529 provided. */
1531 static tree
1532 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1534 const char *prefix;
1535 int prefix_len;
1536 tree new_vect_var;
1538 if (var_kind == vect_simple_var)
1539 prefix = "vect_";
1540 else
1541 prefix = "vect_p";
1543 prefix_len = strlen (prefix);
1545 if (name)
1546 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1547 else
1548 new_vect_var = create_tmp_var (type, prefix);
1550 return new_vect_var;
1554 /* Function vect_create_index_for_vector_ref.
1556 Create (and return) an index variable, along with it's update chain in the
1557 loop. This variable will be used to access a memory location in a vector
1558 operation.
1560 Input:
1561 LOOP: The loop being vectorized.
1562 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1563 function can be added here, or in the loop pre-header.
1565 Output:
1566 Return an index that will be used to index a vector array. It is expected
1567 that a pointer to the first vector will be used as the base address for the
1568 indexed reference.
1570 FORNOW: we are not trying to be efficient, just creating a new index each
1571 time from scratch. At this time all vector references could use the same
1572 index.
1574 TODO: create only one index to be used by all vector references. Record
1575 the index in the LOOP_VINFO the first time this procedure is called and
1576 return it on subsequent calls. The increment of this index must be placed
1577 just before the conditional expression that ends the single block loop. */
1579 static tree
1580 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
1582 tree init, step;
1583 tree indx_before_incr, indx_after_incr;
1585 /* It is assumed that the base pointer used for vectorized access contains
1586 the address of the first vector. Therefore the index used for vectorized
1587 access must be initialized to zero and incremented by 1. */
1589 init = integer_zero_node;
1590 step = integer_one_node;
1592 /* Assuming that bsi_insert is used with BSI_NEW_STMT */
1593 create_iv (init, step, NULL_TREE, loop, bsi, false,
1594 &indx_before_incr, &indx_after_incr);
1596 return indx_before_incr;
1600 /* Function vect_create_addr_base_for_vector_ref.
1602 Create an expression that computes the address of the first memory location
1603 that will be accessed for a data reference.
1605 Input:
1606 STMT: The statement containing the data reference.
1607 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1608 OFFSET: Optional. If supplied, it is be added to the initial address.
1610 Output:
1611 1. Return an SSA_NAME whose value is the address of the memory location of
1612 the first vector of the data reference.
1613 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1614 these statement(s) which define the returned SSA_NAME.
1616 FORNOW: We are only handling array accesses with step 1. */
1618 static tree
1619 vect_create_addr_base_for_vector_ref (tree stmt,
1620 tree *new_stmt_list,
1621 tree offset)
1623 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1624 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1625 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1626 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1627 tree base_name = unshare_expr (DR_BASE_NAME (dr));
1628 tree ref = DR_REF (dr);
1629 tree data_ref_base_type = TREE_TYPE (data_ref_base);
1630 tree scalar_type = TREE_TYPE (ref);
1631 tree scalar_ptr_type = build_pointer_type (scalar_type);
1632 tree access_fn;
1633 tree init_val, step, init_oval;
1634 bool ok;
1635 bool is_ptr_ref, is_array_ref, is_addr_expr;
1636 tree array_base;
1637 tree vec_stmt;
1638 tree new_temp;
1639 tree array_ref;
1640 tree addr_base, addr_expr;
1641 tree dest, new_stmt;
1643 /* Only the access function of the last index is relevant (i_n in
1644 a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */
1645 access_fn = DR_ACCESS_FN (dr, 0);
1646 ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step,
1647 true);
1648 if (!ok)
1649 init_oval = integer_zero_node;
1651 is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE
1652 && TREE_CODE (data_ref_base) == SSA_NAME;
1653 is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE;
1654 is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR
1655 || TREE_CODE (data_ref_base) == PLUS_EXPR
1656 || TREE_CODE (data_ref_base) == MINUS_EXPR;
1657 gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr);
1659 /** Create: &(base[init_val])
1661 if data_ref_base is an ARRAY_TYPE:
1662 base = data_ref_base
1664 if data_ref_base is the SSA_NAME of a POINTER_TYPE:
1665 base = *((scalar_array *) data_ref_base)
1668 if (is_array_ref)
1669 array_base = data_ref_base;
1670 else /* is_ptr_ref or is_addr_expr */
1672 /* array_ptr = (scalar_array_ptr_type *) data_ref_base; */
1673 tree scalar_array_type = build_array_type (scalar_type, 0);
1674 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1675 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1676 add_referenced_tmp_var (array_ptr);
1678 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1679 add_referenced_tmp_var (dest);
1680 data_ref_base =
1681 force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1682 append_to_statement_list_force (new_stmt, new_stmt_list);
1684 vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base);
1685 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1686 new_temp = make_ssa_name (array_ptr, vec_stmt);
1687 TREE_OPERAND (vec_stmt, 0) = new_temp;
1688 append_to_statement_list_force (vec_stmt, new_stmt_list);
1690 /* (*array_ptr) */
1691 array_base = build_fold_indirect_ref (new_temp);
1694 dest = create_tmp_var (TREE_TYPE (init_oval), "newinit");
1695 add_referenced_tmp_var (dest);
1696 init_val = force_gimple_operand (init_oval, &new_stmt, false, dest);
1697 append_to_statement_list_force (new_stmt, new_stmt_list);
1699 if (offset)
1701 tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset");
1702 add_referenced_tmp_var (tmp);
1703 vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset);
1704 vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt);
1705 init_val = make_ssa_name (tmp, vec_stmt);
1706 TREE_OPERAND (vec_stmt, 0) = init_val;
1707 append_to_statement_list_force (vec_stmt, new_stmt_list);
1710 array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val,
1711 NULL_TREE, NULL_TREE);
1712 addr_base = build_fold_addr_expr (array_ref);
1714 /* addr_expr = addr_base */
1715 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1716 get_name (base_name));
1717 add_referenced_tmp_var (addr_expr);
1718 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1719 new_temp = make_ssa_name (addr_expr, vec_stmt);
1720 TREE_OPERAND (vec_stmt, 0) = new_temp;
1721 append_to_statement_list_force (vec_stmt, new_stmt_list);
1723 return new_temp;
1727 /* Function get_vectype_for_scalar_type.
1729 Returns the vector type corresponding to SCALAR_TYPE as supported
1730 by the target. */
1732 static tree
1733 get_vectype_for_scalar_type (tree scalar_type)
1735 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1736 int nbytes = GET_MODE_SIZE (inner_mode);
1737 int nunits;
1738 tree vectype;
1740 if (nbytes == 0)
1741 return NULL_TREE;
1743 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1744 is expected. */
1745 nunits = UNITS_PER_SIMD_WORD / nbytes;
1747 vectype = build_vector_type (scalar_type, nunits);
1748 if (vect_debug_details (NULL))
1750 fprintf (dump_file, "get vectype with %d units of type ", nunits);
1751 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1754 if (!vectype)
1755 return NULL_TREE;
1757 if (vect_debug_details (NULL))
1759 fprintf (dump_file, "vectype: ");
1760 print_generic_expr (dump_file, vectype, TDF_SLIM);
1763 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1765 /* TODO: tree-complex.c sometimes can parallelize operations
1766 on generic vectors. We can vectorize the loop in that case,
1767 but then we should re-run the lowering pass. */
1768 if (vect_debug_details (NULL))
1769 fprintf (dump_file, "mode not supported by target.");
1770 return NULL_TREE;
1773 return vectype;
1777 /* Function vect_align_data_ref.
1779 Handle mislignment of a memory accesses.
1781 FORNOW: Can't handle misaligned accesses.
1782 Make sure that the dataref is aligned. */
1784 static void
1785 vect_align_data_ref (tree stmt)
1787 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1788 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1790 /* FORNOW: can't handle misaligned accesses;
1791 all accesses expected to be aligned. */
1792 gcc_assert (aligned_access_p (dr));
1796 /* Function vect_create_data_ref_ptr.
1798 Create a memory reference expression for vector access, to be used in a
1799 vector load/store stmt. The reference is based on a new pointer to vector
1800 type (vp).
1802 Input:
1803 1. STMT: a stmt that references memory. Expected to be of the form
1804 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
1805 2. BSI: block_stmt_iterator where new stmts can be added.
1806 3. OFFSET (optional): an offset to be added to the initial address accessed
1807 by the data-ref in STMT.
1808 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
1809 pointing to the initial address.
1811 Output:
1812 1. Declare a new ptr to vector_type, and have it point to the base of the
1813 data reference (initial addressed accessed by the data reference).
1814 For example, for vector of type V8HI, the following code is generated:
1816 v8hi *vp;
1817 vp = (v8hi *)initial_address;
1819 if OFFSET is not supplied:
1820 initial_address = &a[init];
1821 if OFFSET is supplied:
1822 initial_address = &a[init + OFFSET];
1824 Return the initial_address in INITIAL_ADDRESS.
1826 2. Create a data-reference in the loop based on the new vector pointer vp,
1827 and using a new index variable 'idx' as follows:
1829 vp' = vp + update
1831 where if ONLY_INIT is true:
1832 update = zero
1833 and otherwise
1834 update = idx + vector_type_size
1836 Return the pointer vp'.
1839 FORNOW: handle only aligned and consecutive accesses. */
1841 static tree
1842 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
1843 tree *initial_address, bool only_init)
1845 tree base_name;
1846 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1847 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1848 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1849 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1850 tree vect_ptr_type;
1851 tree vect_ptr;
1852 tree tag;
1853 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1854 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1855 vuse_optype vuses = STMT_VUSE_OPS (stmt);
1856 int nvuses, nv_may_defs, nv_must_defs;
1857 int i;
1858 tree new_temp;
1859 tree vec_stmt;
1860 tree new_stmt_list = NULL_TREE;
1861 tree idx;
1862 edge pe = loop_preheader_edge (loop);
1863 basic_block new_bb;
1864 tree vect_ptr_init;
1865 tree vectype_size;
1866 tree ptr_update;
1867 tree data_ref_ptr;
1868 tree type, tmp, size;
1870 base_name = unshare_expr (DR_BASE_NAME (dr));
1871 if (vect_debug_details (NULL))
1873 tree data_ref_base = base_name;
1874 fprintf (dump_file, "create array_ref of type: ");
1875 print_generic_expr (dump_file, vectype, TDF_SLIM);
1876 if (TREE_CODE (data_ref_base) == VAR_DECL)
1877 fprintf (dump_file, "vectorizing a one dimensional array ref: ");
1878 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
1879 fprintf (dump_file, "vectorizing a multidimensional array ref: ");
1880 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
1881 fprintf (dump_file, "vectorizing a record based array ref: ");
1882 else if (TREE_CODE (data_ref_base) == SSA_NAME)
1883 fprintf (dump_file, "vectorizing a pointer ref: ");
1884 print_generic_expr (dump_file, base_name, TDF_SLIM);
1887 /** (1) Create the new vector-pointer variable: **/
1889 vect_ptr_type = build_pointer_type (vectype);
1890 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
1891 get_name (base_name));
1892 add_referenced_tmp_var (vect_ptr);
1895 /** (2) Handle aliasing information of the new vector-pointer: **/
1897 tag = STMT_VINFO_MEMTAG (stmt_info);
1898 gcc_assert (tag);
1899 get_var_ann (vect_ptr)->type_mem_tag = tag;
1901 /* Mark for renaming all aliased variables
1902 (i.e, the may-aliases of the type-mem-tag). */
1903 nvuses = NUM_VUSES (vuses);
1904 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
1905 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
1906 for (i = 0; i < nvuses; i++)
1908 tree use = VUSE_OP (vuses, i);
1909 if (TREE_CODE (use) == SSA_NAME)
1910 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
1912 for (i = 0; i < nv_may_defs; i++)
1914 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
1915 if (TREE_CODE (def) == SSA_NAME)
1916 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1918 for (i = 0; i < nv_must_defs; i++)
1920 tree def = V_MUST_DEF_RESULT (v_must_defs, i);
1921 if (TREE_CODE (def) == SSA_NAME)
1922 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1926 /** (3) Calculate the initial address the vector-pointer, and set
1927 the vector-pointer to point to it before the loop: **/
1929 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1930 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1931 offset);
1932 pe = loop_preheader_edge (loop);
1933 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1934 gcc_assert (!new_bb);
1935 *initial_address = new_temp;
1937 /* Create: p = (vectype *) initial_base */
1938 vec_stmt = fold_convert (vect_ptr_type, new_temp);
1939 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1940 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1941 TREE_OPERAND (vec_stmt, 0) = new_temp;
1942 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1943 gcc_assert (!new_bb);
1944 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
1947 /** (4) Handle the updating of the vector-pointer inside the loop: **/
1949 if (only_init) /* No update in loop is required. */
1950 return vect_ptr_init;
1952 idx = vect_create_index_for_vector_ref (loop, bsi);
1954 /* Create: update = idx * vectype_size */
1955 tmp = create_tmp_var (integer_type_node, "update");
1956 add_referenced_tmp_var (tmp);
1957 size = TYPE_SIZE (vect_ptr_type);
1958 type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
1959 ptr_update = create_tmp_var (type, "update");
1960 add_referenced_tmp_var (ptr_update);
1961 vectype_size = build_int_cst (integer_type_node,
1962 GET_MODE_SIZE (TYPE_MODE (vectype)));
1963 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
1964 vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
1965 new_temp = make_ssa_name (tmp, vec_stmt);
1966 TREE_OPERAND (vec_stmt, 0) = new_temp;
1967 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1968 vec_stmt = fold_convert (type, new_temp);
1969 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
1970 new_temp = make_ssa_name (ptr_update, vec_stmt);
1971 TREE_OPERAND (vec_stmt, 0) = new_temp;
1972 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1974 /* Create: data_ref_ptr = vect_ptr_init + update */
1975 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
1976 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1977 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1978 TREE_OPERAND (vec_stmt, 0) = new_temp;
1979 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1980 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
1982 return data_ref_ptr;
1986 /* Function vect_create_destination_var.
1988 Create a new temporary of type VECTYPE. */
1990 static tree
1991 vect_create_destination_var (tree scalar_dest, tree vectype)
1993 tree vec_dest;
1994 const char *new_name;
1996 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1998 new_name = get_name (scalar_dest);
1999 if (!new_name)
2000 new_name = "var_";
2001 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
2002 add_referenced_tmp_var (vec_dest);
2004 return vec_dest;
2008 /* Function vect_init_vector.
2010 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
2011 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
2012 used in the vectorization of STMT. */
2014 static tree
2015 vect_init_vector (tree stmt, tree vector_var)
2017 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2018 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2019 tree new_var;
2020 tree init_stmt;
2021 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2022 tree vec_oprnd;
2023 edge pe;
2024 tree new_temp;
2025 basic_block new_bb;
2027 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
2028 add_referenced_tmp_var (new_var);
2030 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
2031 new_temp = make_ssa_name (new_var, init_stmt);
2032 TREE_OPERAND (init_stmt, 0) = new_temp;
2034 pe = loop_preheader_edge (loop);
2035 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
2036 gcc_assert (!new_bb);
2038 if (vect_debug_details (NULL))
2040 fprintf (dump_file, "created new init_stmt: ");
2041 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
2044 vec_oprnd = TREE_OPERAND (init_stmt, 0);
2045 return vec_oprnd;
2049 /* Function vect_get_vec_def_for_operand.
2051 OP is an operand in STMT. This function returns a (vector) def that will be
2052 used in the vectorized stmt for STMT.
2054 In the case that OP is an SSA_NAME which is defined in the loop, then
2055 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
2057 In case OP is an invariant or constant, a new stmt that creates a vector def
2058 needs to be introduced. */
2060 static tree
2061 vect_get_vec_def_for_operand (tree op, tree stmt)
2063 tree vec_oprnd;
2064 tree vec_stmt;
2065 tree def_stmt;
2066 stmt_vec_info def_stmt_info = NULL;
2067 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2068 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2069 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2070 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2071 basic_block bb;
2072 tree vec_inv;
2073 tree t = NULL_TREE;
2074 tree def;
2075 int i;
2077 if (vect_debug_details (NULL))
2079 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
2080 print_generic_expr (dump_file, op, TDF_SLIM);
2083 /** ===> Case 1: operand is a constant. **/
2085 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2087 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
2089 tree vec_cst;
2091 /* Build a tree with vector elements. */
2092 if (vect_debug_details (NULL))
2093 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
2095 for (i = nunits - 1; i >= 0; --i)
2097 t = tree_cons (NULL_TREE, op, t);
2099 vec_cst = build_vector (vectype, t);
2100 return vect_init_vector (stmt, vec_cst);
2103 gcc_assert (TREE_CODE (op) == SSA_NAME);
2105 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
2107 def_stmt = SSA_NAME_DEF_STMT (op);
2108 def_stmt_info = vinfo_for_stmt (def_stmt);
2110 if (vect_debug_details (NULL))
2112 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2113 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2117 /** ==> Case 2.1: operand is defined inside the loop. **/
2119 if (def_stmt_info)
2121 /* Get the def from the vectorized stmt. */
2123 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2124 gcc_assert (vec_stmt);
2125 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2126 return vec_oprnd;
2130 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2131 it is a reduction/induction. **/
2133 bb = bb_for_stmt (def_stmt);
2134 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2136 if (vect_debug_details (NULL))
2137 fprintf (dump_file, "reduction/induction - unsupported.");
2138 internal_error ("no support for reduction/induction"); /* FORNOW */
2142 /** ==> Case 2.3: operand is defined outside the loop -
2143 it is a loop invariant. */
2145 switch (TREE_CODE (def_stmt))
2147 case PHI_NODE:
2148 def = PHI_RESULT (def_stmt);
2149 break;
2150 case MODIFY_EXPR:
2151 def = TREE_OPERAND (def_stmt, 0);
2152 break;
2153 case NOP_EXPR:
2154 def = TREE_OPERAND (def_stmt, 0);
2155 gcc_assert (IS_EMPTY_STMT (def_stmt));
2156 def = op;
2157 break;
2158 default:
2159 if (vect_debug_details (NULL))
2161 fprintf (dump_file, "unsupported defining stmt: ");
2162 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2164 internal_error ("unsupported defining stmt");
2167 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
2169 if (vect_debug_details (NULL))
2170 fprintf (dump_file, "Create vector_inv.");
2172 for (i = nunits - 1; i >= 0; --i)
2174 t = tree_cons (NULL_TREE, def, t);
2177 vec_inv = build_constructor (vectype, t);
2178 return vect_init_vector (stmt, vec_inv);
2182 /* Function vect_finish_stmt_generation.
2184 Insert a new stmt. */
2186 static void
2187 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2189 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2191 if (vect_debug_details (NULL))
2193 fprintf (dump_file, "add new stmt: ");
2194 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2197 /* Make sure bsi points to the stmt that is being vectorized. */
2199 /* Assumption: any stmts created for the vectorization of stmt S were
2200 inserted before S. BSI is expected to point to S or some new stmt before S.
2203 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
2204 bsi_next (bsi);
2205 gcc_assert (stmt == bsi_stmt (*bsi));
2209 /* Function vectorizable_assignment.
2211 Check if STMT performs an assignment (copy) that can be vectorized.
2212 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2213 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2214 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2216 static bool
2217 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2219 tree vec_dest;
2220 tree scalar_dest;
2221 tree op;
2222 tree vec_oprnd;
2223 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2224 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2225 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2226 tree new_temp;
2228 /* Is vectorizable assignment? */
2230 if (TREE_CODE (stmt) != MODIFY_EXPR)
2231 return false;
2233 scalar_dest = TREE_OPERAND (stmt, 0);
2234 if (TREE_CODE (scalar_dest) != SSA_NAME)
2235 return false;
2237 op = TREE_OPERAND (stmt, 1);
2238 if (!vect_is_simple_use (op, loop, NULL))
2240 if (vect_debug_details (NULL))
2241 fprintf (dump_file, "use not simple.");
2242 return false;
2245 if (!vec_stmt) /* transformation not required. */
2247 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2248 return true;
2251 /** Trasform. **/
2252 if (vect_debug_details (NULL))
2253 fprintf (dump_file, "transform assignment.");
2255 /* Handle def. */
2256 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2258 /* Handle use. */
2259 op = TREE_OPERAND (stmt, 1);
2260 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2262 /* Arguments are ready. create the new vector stmt. */
2263 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2264 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2265 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2266 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2268 return true;
2272 /* Function vectorizable_operation.
2274 Check if STMT performs a binary or unary operation that can be vectorized.
2275 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2276 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2277 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2279 static bool
2280 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2282 tree vec_dest;
2283 tree scalar_dest;
2284 tree operation;
2285 tree op0, op1 = NULL;
2286 tree vec_oprnd0, vec_oprnd1=NULL;
2287 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2288 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2289 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2290 int i;
2291 enum tree_code code;
2292 enum machine_mode vec_mode;
2293 tree new_temp;
2294 int op_type;
2295 tree op;
2296 optab optab;
2298 /* Is STMT a vectorizable binary/unary operation? */
2299 if (TREE_CODE (stmt) != MODIFY_EXPR)
2300 return false;
2302 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2303 return false;
2305 operation = TREE_OPERAND (stmt, 1);
2306 code = TREE_CODE (operation);
2307 optab = optab_for_tree_code (code, vectype);
2309 /* Support only unary or binary operations. */
2310 op_type = TREE_CODE_LENGTH (code);
2311 if (op_type != unary_op && op_type != binary_op)
2313 if (vect_debug_details (NULL))
2314 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2315 return false;
2318 for (i = 0; i < op_type; i++)
2320 op = TREE_OPERAND (operation, i);
2321 if (!vect_is_simple_use (op, loop, NULL))
2323 if (vect_debug_details (NULL))
2324 fprintf (dump_file, "use not simple.");
2325 return false;
2329 /* Supportable by target? */
2330 if (!optab)
2332 if (vect_debug_details (NULL))
2333 fprintf (dump_file, "no optab.");
2334 return false;
2336 vec_mode = TYPE_MODE (vectype);
2337 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2339 if (vect_debug_details (NULL))
2340 fprintf (dump_file, "op not supported by target.");
2341 return false;
2344 if (!vec_stmt) /* transformation not required. */
2346 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2347 return true;
2350 /** Transform. **/
2352 if (vect_debug_details (NULL))
2353 fprintf (dump_file, "transform binary/unary operation.");
2355 /* Handle def. */
2356 scalar_dest = TREE_OPERAND (stmt, 0);
2357 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2359 /* Handle uses. */
2360 op0 = TREE_OPERAND (operation, 0);
2361 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2363 if (op_type == binary_op)
2365 op1 = TREE_OPERAND (operation, 1);
2366 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2369 /* Arguments are ready. create the new vector stmt. */
2371 if (op_type == binary_op)
2372 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2373 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2374 else
2375 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2376 build1 (code, vectype, vec_oprnd0));
2377 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2378 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2379 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2381 return true;
2385 /* Function vectorizable_store.
2387 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2388 can be vectorized.
2389 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2390 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2391 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2393 static bool
2394 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2396 tree scalar_dest;
2397 tree data_ref;
2398 tree op;
2399 tree vec_oprnd1;
2400 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2401 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2402 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2403 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2404 enum machine_mode vec_mode;
2405 tree dummy;
2406 enum dr_alignment_support alignment_support_cheme;
2408 /* Is vectorizable store? */
2410 if (TREE_CODE (stmt) != MODIFY_EXPR)
2411 return false;
2413 scalar_dest = TREE_OPERAND (stmt, 0);
2414 if (TREE_CODE (scalar_dest) != ARRAY_REF
2415 && TREE_CODE (scalar_dest) != INDIRECT_REF)
2416 return false;
2418 op = TREE_OPERAND (stmt, 1);
2419 if (!vect_is_simple_use (op, loop, NULL))
2421 if (vect_debug_details (NULL))
2422 fprintf (dump_file, "use not simple.");
2423 return false;
2426 vec_mode = TYPE_MODE (vectype);
2427 /* FORNOW. In some cases can vectorize even if data-type not supported
2428 (e.g. - array initialization with 0). */
2429 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2430 return false;
2432 if (!STMT_VINFO_DATA_REF (stmt_info))
2433 return false;
2436 if (!vec_stmt) /* transformation not required. */
2438 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2439 return true;
2442 /** Trasform. **/
2444 if (vect_debug_details (NULL))
2445 fprintf (dump_file, "transform store");
2447 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2448 gcc_assert (alignment_support_cheme);
2449 gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */
2451 /* Handle use - get the vectorized def from the defining stmt. */
2452 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2454 /* Handle def. */
2455 /* FORNOW: make sure the data reference is aligned. */
2456 vect_align_data_ref (stmt);
2457 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2458 data_ref = build_fold_indirect_ref (data_ref);
2460 /* Arguments are ready. create the new vector stmt. */
2461 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2462 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2464 return true;
2468 /* vectorizable_load.
2470 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2471 can be vectorized.
2472 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2473 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2474 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2476 static bool
2477 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2479 tree scalar_dest;
2480 tree vec_dest = NULL;
2481 tree data_ref = NULL;
2482 tree op;
2483 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2484 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2485 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2486 tree new_temp;
2487 int mode;
2488 tree init_addr;
2489 tree new_stmt;
2490 tree dummy;
2491 basic_block new_bb;
2492 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2493 edge pe = loop_preheader_edge (loop);
2494 enum dr_alignment_support alignment_support_cheme;
2496 /* Is vectorizable load? */
2498 if (TREE_CODE (stmt) != MODIFY_EXPR)
2499 return false;
2501 scalar_dest = TREE_OPERAND (stmt, 0);
2502 if (TREE_CODE (scalar_dest) != SSA_NAME)
2503 return false;
2505 op = TREE_OPERAND (stmt, 1);
2506 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2507 return false;
2509 if (!STMT_VINFO_DATA_REF (stmt_info))
2510 return false;
2512 mode = (int) TYPE_MODE (vectype);
2514 /* FORNOW. In some cases can vectorize even if data-type not supported
2515 (e.g. - data copies). */
2516 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2518 if (vect_debug_details (loop))
2519 fprintf (dump_file, "Aligned load, but unsupported type.");
2520 return false;
2523 if (!vec_stmt) /* transformation not required. */
2525 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2526 return true;
2529 /** Trasform. **/
2531 if (vect_debug_details (NULL))
2532 fprintf (dump_file, "transform load.");
2534 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2535 gcc_assert (alignment_support_cheme);
2537 if (alignment_support_cheme == dr_aligned
2538 || alignment_support_cheme == dr_unaligned_supported)
2540 /* Create:
2541 p = initial_addr;
2542 indx = 0;
2543 loop {
2544 vec_dest = *(p);
2545 indx = indx + 1;
2549 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2550 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2551 if (aligned_access_p (dr))
2552 data_ref = build_fold_indirect_ref (data_ref);
2553 else
2555 int mis = DR_MISALIGNMENT (dr);
2556 tree tmis = (mis == -1 ?
2557 integer_zero_node :
2558 build_int_cst (integer_type_node, mis));
2559 tmis = int_const_binop (MULT_EXPR, tmis,
2560 build_int_cst (integer_type_node, BITS_PER_UNIT), 1);
2561 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2563 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2564 new_temp = make_ssa_name (vec_dest, new_stmt);
2565 TREE_OPERAND (new_stmt, 0) = new_temp;
2566 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2568 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2570 /* Create:
2571 p1 = initial_addr;
2572 msq_init = *(floor(p1))
2573 p2 = initial_addr + VS - 1;
2574 magic = have_builtin ? builtin_result : initial_address;
2575 indx = 0;
2576 loop {
2577 p2' = p2 + indx * vectype_size
2578 lsq = *(floor(p2'))
2579 vec_dest = realign_load (msq, lsq, magic)
2580 indx = indx + 1;
2581 msq = lsq;
2585 tree offset;
2586 tree magic;
2587 tree phi_stmt;
2588 tree msq_init;
2589 tree msq, lsq;
2590 tree dataref_ptr;
2591 tree params;
2593 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
2594 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2595 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2596 &init_addr, true);
2597 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2598 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2599 new_temp = make_ssa_name (vec_dest, new_stmt);
2600 TREE_OPERAND (new_stmt, 0) = new_temp;
2601 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2602 gcc_assert (!new_bb);
2603 msq_init = TREE_OPERAND (new_stmt, 0);
2606 /* <2> Create lsq = *(floor(p2')) in the loop */
2607 offset = build_int_cst (integer_type_node,
2608 GET_MODE_NUNITS (TYPE_MODE (vectype)));
2609 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2610 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2611 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2612 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2613 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2614 new_temp = make_ssa_name (vec_dest, new_stmt);
2615 TREE_OPERAND (new_stmt, 0) = new_temp;
2616 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2617 lsq = TREE_OPERAND (new_stmt, 0);
2620 /* <3> */
2621 if (targetm.vectorize.builtin_mask_for_load)
2623 /* Create permutation mask, if required, in loop preheader. */
2624 tree builtin_decl;
2625 params = build_tree_list (NULL_TREE, init_addr);
2626 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2627 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2628 new_stmt = build_function_call_expr (builtin_decl, params);
2629 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2630 new_temp = make_ssa_name (vec_dest, new_stmt);
2631 TREE_OPERAND (new_stmt, 0) = new_temp;
2632 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2633 gcc_assert (!new_bb);
2634 magic = TREE_OPERAND (new_stmt, 0);
2636 else
2638 /* Use current address instead of init_addr for reduced reg pressure.
2640 magic = dataref_ptr;
2644 /* <4> Create msq = phi <msq_init, lsq> in loop */
2645 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2646 msq = make_ssa_name (vec_dest, NULL_TREE);
2647 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2648 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2649 add_phi_arg (&phi_stmt, msq_init, loop_preheader_edge (loop));
2650 add_phi_arg (&phi_stmt, lsq, loop_latch_edge (loop));
2653 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
2654 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2655 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2656 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2657 new_temp = make_ssa_name (vec_dest, new_stmt);
2658 TREE_OPERAND (new_stmt, 0) = new_temp;
2659 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2661 else
2662 gcc_unreachable ();
2664 *vec_stmt = new_stmt;
2665 return true;
2669 /* Function vect_supportable_dr_alignment
2671 Return whether the data reference DR is supported with respect to its
2672 alignment. */
2674 static enum dr_alignment_support
2675 vect_supportable_dr_alignment (struct data_reference *dr)
2677 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2678 enum machine_mode mode = (int) TYPE_MODE (vectype);
2680 if (aligned_access_p (dr))
2681 return dr_aligned;
2683 /* Possibly unaligned access. */
2685 if (DR_IS_READ (dr))
2687 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2688 && (!targetm.vectorize.builtin_mask_for_load
2689 || targetm.vectorize.builtin_mask_for_load ()))
2690 return dr_unaligned_software_pipeline;
2692 if (targetm.vectorize.misaligned_mem_ok (mode))
2693 /* Can't software pipeline the loads. */
2694 return dr_unaligned_supported;
2697 /* Unsupported. */
2698 return dr_unaligned_unsupported;
2702 /* Function vect_transform_stmt.
2704 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2706 static bool
2707 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2709 bool is_store = false;
2710 tree vec_stmt = NULL_TREE;
2711 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2712 bool done;
2714 switch (STMT_VINFO_TYPE (stmt_info))
2716 case op_vec_info_type:
2717 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2718 gcc_assert (done);
2719 break;
2721 case assignment_vec_info_type:
2722 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2723 gcc_assert (done);
2724 break;
2726 case load_vec_info_type:
2727 done = vectorizable_load (stmt, bsi, &vec_stmt);
2728 gcc_assert (done);
2729 break;
2731 case store_vec_info_type:
2732 done = vectorizable_store (stmt, bsi, &vec_stmt);
2733 gcc_assert (done);
2734 is_store = true;
2735 break;
2736 default:
2737 if (vect_debug_details (NULL))
2738 fprintf (dump_file, "stmt not supported.");
2739 gcc_unreachable ();
2742 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2744 return is_store;
2748 /* This function builds ni_name = number of iterations loop executes
2749 on the loop preheader. */
2751 static tree
2752 vect_build_loop_niters (loop_vec_info loop_vinfo)
2754 tree ni_name, stmt, var;
2755 edge pe;
2756 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2757 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2759 var = create_tmp_var (TREE_TYPE (ni), "niters");
2760 add_referenced_tmp_var (var);
2761 ni_name = force_gimple_operand (ni, &stmt, false, var);
2763 pe = loop_preheader_edge (loop);
2764 if (stmt)
2766 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2767 gcc_assert (!new_bb);
2770 return ni_name;
2774 /* This function generates the following statements:
2776 ni_name = number of iterations loop executes
2777 ratio = ni_name / vf
2778 ratio_mult_vf_name = ratio * vf
2780 and places them at the loop preheader edge. */
2782 static void
2783 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
2784 tree *ni_name_ptr,
2785 tree *ratio_mult_vf_name_ptr,
2786 tree *ratio_name_ptr)
2789 edge pe;
2790 basic_block new_bb;
2791 tree stmt, ni_name;
2792 tree var;
2793 tree ratio_name;
2794 tree ratio_mult_vf_name;
2795 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2796 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
2797 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2798 tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
2800 pe = loop_preheader_edge (loop);
2802 /* Generate temporary variable that contains
2803 number of iterations loop executes. */
2805 ni_name = vect_build_loop_niters (loop_vinfo);
2807 /* Create: ratio = ni >> log2(vf) */
2809 var = create_tmp_var (TREE_TYPE (ni), "bnd");
2810 add_referenced_tmp_var (var);
2811 ratio_name = make_ssa_name (var, NULL_TREE);
2812 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
2813 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
2814 SSA_NAME_DEF_STMT (ratio_name) = stmt;
2816 pe = loop_preheader_edge (loop);
2817 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2818 gcc_assert (!new_bb);
2820 /* Create: ratio_mult_vf = ratio << log2 (vf). */
2822 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2823 add_referenced_tmp_var (var);
2824 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
2825 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2826 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
2827 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2829 pe = loop_preheader_edge (loop);
2830 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2831 gcc_assert (!new_bb);
2833 *ni_name_ptr = ni_name;
2834 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
2835 *ratio_name_ptr = ratio_name;
2837 return;
2841 /* Function vect_update_ivs_after_vectorizer.
2843 "Advance" the induction variables of LOOP to the value they should take
2844 after the execution of LOOP. This is currently necessary because the
2845 vectorizer does not handle induction variables that are used after the
2846 loop. Such a situation occurs when the last iterations of LOOP are
2847 peeled, because:
2848 1. We introduced new uses after LOOP for IVs that were not originally used
2849 after LOOP: the IVs of LOOP are now used by an epilog loop.
2850 2. LOOP is going to be vectorized; this means that it will iterate N/VF
2851 times, whereas the loop IVs should be bumped N times.
2853 Input:
2854 - LOOP - a loop that is going to be vectorized. The last few iterations
2855 of LOOP were peeled.
2856 - NITERS - the number of iterations that LOOP executes (before it is
2857 vectorized). i.e, the number of times the ivs should be bumped.
2858 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2859 coming out from LOOP on which there are uses of the LOOP ivs
2860 (this is the path from LOOP->exit to epilog_loop->preheader).
2862 The new definitions of the ivs are placed in LOOP->exit.
2863 The phi args associated with the edge UPDATE_E in the bb
2864 UPDATE_E->dest are updated accordingly.
2866 Assumption 1: Like the rest of the vectorizer, this function assumes
2867 a single loop exit that has a single predecessor.
2869 Assumption 2: The phi nodes in the LOOP header and in update_bb are
2870 organized in the same order.
2872 Assumption 3: The access function of the ivs is simple enough (see
2873 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
2875 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2876 coming out of LOOP on which the ivs of LOOP are used (this is the path
2877 that leads to the epilog loop; other paths skip the epilog loop). This
2878 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2879 needs to have its phis updated.
2882 static void
2883 vect_update_ivs_after_vectorizer (struct loop *loop, tree niters, edge update_e)
2885 basic_block exit_bb = loop->exit_edges[0]->dest;
2886 tree phi, phi1;
2887 basic_block update_bb = update_e->dest;
2889 /* gcc_assert (vect_can_advance_ivs_p (loop)); */
2891 /* Make sure there exists a single-predecessor exit bb: */
2892 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
2894 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2895 phi && phi1;
2896 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2898 tree access_fn = NULL;
2899 tree evolution_part;
2900 tree init_expr;
2901 tree step_expr;
2902 tree var, stmt, ni, ni_name;
2903 block_stmt_iterator last_bsi;
2905 /* Skip virtual phi's. */
2906 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2908 if (vect_debug_details (NULL))
2909 fprintf (dump_file, "virtual phi. skip.");
2910 continue;
2913 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2914 gcc_assert (access_fn);
2915 evolution_part =
2916 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2917 gcc_assert (evolution_part != NULL_TREE);
2919 /* FORNOW: We do not support IVs whose evolution function is a polynomial
2920 of degree >= 2 or exponential. */
2921 gcc_assert (!tree_is_chrec (evolution_part));
2923 step_expr = evolution_part;
2924 init_expr = unshare_expr (initial_condition (access_fn));
2926 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2927 build2 (MULT_EXPR, TREE_TYPE (niters),
2928 niters, step_expr), init_expr);
2930 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2931 add_referenced_tmp_var (var);
2933 ni_name = force_gimple_operand (ni, &stmt, false, var);
2935 /* Insert stmt into exit_bb. */
2936 last_bsi = bsi_last (exit_bb);
2937 if (stmt)
2938 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
2940 /* Fix phi expressions in the successor bb. */
2941 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
2942 PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
2943 SET_PHI_ARG_DEF (phi1, phi_arg_from_edge (phi1, update_e), ni_name);
2948 /* Function vect_do_peeling_for_loop_bound
2950 Peel the last iterations of the loop represented by LOOP_VINFO.
2951 The peeled iterations form a new epilog loop. Given that the loop now
2952 iterates NITERS times, the new epilog loop iterates
2953 NITERS % VECTORIZATION_FACTOR times.
2955 The original loop will later be made to iterate
2956 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
2958 static void
2959 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
2960 struct loops *loops)
2963 tree ni_name, ratio_mult_vf_name;
2964 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2965 struct loop *new_loop;
2966 edge update_e;
2967 #ifdef ENABLE_CHECKING
2968 int loop_num;
2969 #endif
2971 if (vect_debug_details (NULL))
2972 fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
2974 /* Generate the following variables on the preheader of original loop:
2976 ni_name = number of iteration the original loop executes
2977 ratio = ni_name / vf
2978 ratio_mult_vf_name = ratio * vf */
2979 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
2980 &ratio_mult_vf_name, ratio);
2982 /* Update loop info. */
2983 loop->pre_header = loop_preheader_edge (loop)->src;
2984 loop->pre_header_edges[0] = loop_preheader_edge (loop);
2986 #ifdef ENABLE_CHECKING
2987 loop_num = loop->num;
2988 #endif
2989 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0],
2990 ratio_mult_vf_name, ni_name, false);
2991 #ifdef ENABLE_CHECKING
2992 gcc_assert (new_loop);
2993 gcc_assert (loop_num == loop->num);
2994 slpeel_verify_cfg_after_peeling (loop, new_loop);
2995 #endif
2997 /* A guard that controls whether the new_loop is to be executed or skipped
2998 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
2999 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
3000 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
3001 is on the path where the LOOP IVs are used and need to be updated. */
3003 if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest)
3004 update_e = EDGE_PRED (new_loop->pre_header, 0);
3005 else
3006 update_e = EDGE_PRED (new_loop->pre_header, 1);
3008 /* Update IVs of original loop as if they were advanced
3009 by ratio_mult_vf_name steps. */
3010 vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name, update_e);
3012 /* After peeling we have to reset scalar evolution analyzer. */
3013 scev_reset ();
3015 return;
3019 /* Function vect_gen_niters_for_prolog_loop
3021 Set the number of iterations for the loop represented by LOOP_VINFO
3022 to the minimum between LOOP_NITERS (the original iteration count of the loop)
3023 and the misalignment of DR - the first data reference recorded in
3024 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
3025 this loop, the data reference DR will refer to an aligned location.
3027 The following computation is generated:
3029 compute address misalignment in bytes:
3030 addr_mis = addr & (vectype_size - 1)
3032 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
3034 (elem_size = element type size; an element is the scalar element
3035 whose type is the inner type of the vectype) */
3037 static tree
3038 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
3040 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3041 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3042 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3043 tree var, stmt;
3044 tree iters, iters_name;
3045 edge pe;
3046 basic_block new_bb;
3047 tree dr_stmt = DR_STMT (dr);
3048 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3049 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3050 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
3051 tree elem_misalign;
3052 tree byte_misalign;
3053 tree new_stmts = NULL_TREE;
3054 tree start_addr =
3055 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
3056 tree ptr_type = TREE_TYPE (start_addr);
3057 tree size = TYPE_SIZE (ptr_type);
3058 tree type = lang_hooks.types.type_for_size (TREE_INT_CST_LOW (size), 1);
3059 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
3060 tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
3061 tree niters_type = TREE_TYPE (loop_niters);
3062 tree elem_size_log =
3063 build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
3064 tree vf_tree = build_int_cst (unsigned_type_node, vf);
3066 pe = loop_preheader_edge (loop);
3067 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
3068 gcc_assert (!new_bb);
3070 /* Create: byte_misalign = addr & (vectype_size - 1) */
3071 byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
3073 /* Create: elem_misalign = byte_misalign / element_size */
3074 elem_misalign =
3075 build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
3077 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
3078 iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
3079 iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
3080 iters = fold_convert (niters_type, iters);
3082 /* Create: prolog_loop_niters = min (iters, loop_niters) */
3083 /* If the loop bound is known at compile time we already verified that it is
3084 greater than vf; since the misalignment ('iters') is at most vf, there's
3085 no need to generate the MIN_EXPR in this case. */
3086 if (!host_integerp (loop_niters, 0))
3087 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
3089 var = create_tmp_var (niters_type, "prolog_loop_niters");
3090 add_referenced_tmp_var (var);
3091 iters_name = force_gimple_operand (iters, &stmt, false, var);
3093 /* Insert stmt on loop preheader edge. */
3094 pe = loop_preheader_edge (loop);
3095 if (stmt)
3097 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3098 gcc_assert (!new_bb);
3101 return iters_name;
3105 /* Function vect_update_inits_of_dr
3107 NITERS iterations were peeled from LOOP. DR represents a data reference
3108 in LOOP. This function updates the information recorded in DR to
3109 account for the fact that the first NITERS iterations had already been
3110 executed. Specifically, it updates the initial_condition of the
3111 access_function of DR. */
3113 static void
3114 vect_update_inits_of_dr (struct data_reference *dr, struct loop *loop,
3115 tree niters)
3117 tree access_fn = DR_ACCESS_FN (dr, 0);
3118 tree init, init_new, step;
3120 step = evolution_part_in_loop_num (access_fn, loop->num);
3121 init = initial_condition (access_fn);
3123 init_new = build2 (PLUS_EXPR, TREE_TYPE (init),
3124 build2 (MULT_EXPR, TREE_TYPE (niters),
3125 niters, step), init);
3126 DR_ACCESS_FN (dr, 0) = chrec_replace_initial_condition (access_fn, init_new);
3128 return;
3132 /* Function vect_update_inits_of_drs
3134 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3135 This function updates the information recorded for the data references in
3136 the loop to account for the fact that the first NITERS iterations had
3137 already been executed. Specifically, it updates the initial_condition of the
3138 access_function of all the data_references in the loop. */
3140 static void
3141 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3143 unsigned int i;
3144 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3145 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3146 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3148 if (dump_file && (dump_flags & TDF_DETAILS))
3149 fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
3151 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3153 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3154 vect_update_inits_of_dr (dr, loop, niters);
3157 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3159 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3160 vect_update_inits_of_dr (dr, loop, niters);
3165 /* Function vect_do_peeling_for_alignment
3167 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3168 'niters' is set to the misalignment of one of the data references in the
3169 loop, thereby forcing it to refer to an aligned location at the beginning
3170 of the execution of this loop. The data reference for which we are
3171 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
3173 static void
3174 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3176 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3177 tree niters_of_prolog_loop, ni_name;
3178 tree n_iters;
3179 struct loop *new_loop;
3181 if (vect_debug_details (NULL))
3182 fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3184 ni_name = vect_build_loop_niters (loop_vinfo);
3185 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3187 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
3188 new_loop =
3189 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
3190 niters_of_prolog_loop, ni_name, true);
3191 #ifdef ENABLE_CHECKING
3192 gcc_assert (new_loop);
3193 slpeel_verify_cfg_after_peeling (new_loop, loop);
3194 #endif
3196 /* Update number of times loop executes. */
3197 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3198 LOOP_VINFO_NITERS (loop_vinfo) =
3199 build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
3201 /* Update the init conditions of the access functions of all data refs. */
3202 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3204 /* After peeling we have to reset scalar evolution analyzer. */
3205 scev_reset ();
3207 return;
3211 /* Function vect_transform_loop.
3213 The analysis phase has determined that the loop is vectorizable.
3214 Vectorize the loop - created vectorized stmts to replace the scalar
3215 stmts in the loop, and update the loop exit condition. */
3217 static void
3218 vect_transform_loop (loop_vec_info loop_vinfo,
3219 struct loops *loops ATTRIBUTE_UNUSED)
3221 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3222 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3223 int nbbs = loop->num_nodes;
3224 block_stmt_iterator si;
3225 int i;
3226 tree ratio = NULL;
3227 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3229 if (vect_debug_details (NULL))
3230 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3233 /* Peel the loop if there are data refs with unknown alignment.
3234 Only one data ref with unknown store is allowed. */
3236 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3237 vect_do_peeling_for_alignment (loop_vinfo, loops);
3239 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3240 compile time constant), or it is a constant that doesn't divide by the
3241 vectorization factor, then an epilog loop needs to be created.
3242 We therefore duplicate the loop: the original loop will be vectorized,
3243 and will compute the first (n/VF) iterations. The second copy of the loop
3244 will remain scalar and will compute the remaining (n%VF) iterations.
3245 (VF is the vectorization factor). */
3247 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3248 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3249 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3250 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3251 else
3252 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3253 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
3255 /* 1) Make sure the loop header has exactly two entries
3256 2) Make sure we have a preheader basic block. */
3258 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3260 loop_split_edge_with (loop_preheader_edge (loop), NULL);
3263 /* FORNOW: the vectorizer supports only loops which body consist
3264 of one basic block (header + empty latch). When the vectorizer will
3265 support more involved loop forms, the order by which the BBs are
3266 traversed need to be reconsidered. */
3268 for (i = 0; i < nbbs; i++)
3270 basic_block bb = bbs[i];
3272 for (si = bsi_start (bb); !bsi_end_p (si);)
3274 tree stmt = bsi_stmt (si);
3275 stmt_vec_info stmt_info;
3276 bool is_store;
3278 if (vect_debug_details (NULL))
3280 fprintf (dump_file, "------>vectorizing statement: ");
3281 print_generic_expr (dump_file, stmt, TDF_SLIM);
3283 stmt_info = vinfo_for_stmt (stmt);
3284 gcc_assert (stmt_info);
3285 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3287 bsi_next (&si);
3288 continue;
3290 #ifdef ENABLE_CHECKING
3291 /* FORNOW: Verify that all stmts operate on the same number of
3292 units and no inner unrolling is necessary. */
3293 gcc_assert
3294 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3295 == vectorization_factor);
3296 #endif
3297 /* -------- vectorize statement ------------ */
3298 if (vect_debug_details (NULL))
3299 fprintf (dump_file, "transform statement.");
3301 is_store = vect_transform_stmt (stmt, &si);
3302 if (is_store)
3304 /* free the attached stmt_vec_info and remove the stmt. */
3305 stmt_ann_t ann = stmt_ann (stmt);
3306 free (stmt_info);
3307 set_stmt_info (ann, NULL);
3308 bsi_remove (&si);
3309 continue;
3312 bsi_next (&si);
3313 } /* stmts in BB */
3314 } /* BBs in loop */
3316 slpeel_make_loop_iterate_ntimes (loop, ratio);
3318 if (vect_debug_details (loop))
3319 fprintf (dump_file,"Success! loop vectorized.");
3320 if (vect_debug_stats (loop))
3321 fprintf (dump_file, "LOOP VECTORIZED.");
3325 /* Function vect_is_simple_use.
3327 Input:
3328 LOOP - the loop that is being vectorized.
3329 OPERAND - operand of a stmt in LOOP.
3330 DEF - the defining stmt in case OPERAND is an SSA_NAME.
3332 Returns whether a stmt with OPERAND can be vectorized.
3333 Supportable operands are constants, loop invariants, and operands that are
3334 defined by the current iteration of the loop. Unsupportable operands are
3335 those that are defined by a previous iteration of the loop (as is the case
3336 in reduction/induction computations). */
3338 static bool
3339 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
3341 tree def_stmt;
3342 basic_block bb;
3344 if (def)
3345 *def = NULL_TREE;
3347 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3348 return true;
3350 if (TREE_CODE (operand) != SSA_NAME)
3351 return false;
3353 def_stmt = SSA_NAME_DEF_STMT (operand);
3354 if (def_stmt == NULL_TREE )
3356 if (vect_debug_details (NULL))
3357 fprintf (dump_file, "no def_stmt.");
3358 return false;
3361 /* empty stmt is expected only in case of a function argument.
3362 (Otherwise - we expect a phi_node or a modify_expr). */
3363 if (IS_EMPTY_STMT (def_stmt))
3365 tree arg = TREE_OPERAND (def_stmt, 0);
3366 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3367 return true;
3368 if (vect_debug_details (NULL))
3370 fprintf (dump_file, "Unexpected empty stmt: ");
3371 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3373 return false;
3376 /* phi_node inside the loop indicates an induction/reduction pattern.
3377 This is not supported yet. */
3378 bb = bb_for_stmt (def_stmt);
3379 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3381 if (vect_debug_details (NULL))
3382 fprintf (dump_file, "reduction/induction - unsupported.");
3383 return false; /* FORNOW: not supported yet. */
3386 /* Expecting a modify_expr or a phi_node. */
3387 if (TREE_CODE (def_stmt) == MODIFY_EXPR
3388 || TREE_CODE (def_stmt) == PHI_NODE)
3390 if (def)
3391 *def = def_stmt;
3392 return true;
3395 return false;
3399 /* Function vect_analyze_operations.
3401 Scan the loop stmts and make sure they are all vectorizable. */
3403 static bool
3404 vect_analyze_operations (loop_vec_info loop_vinfo)
3406 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3407 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3408 int nbbs = loop->num_nodes;
3409 block_stmt_iterator si;
3410 unsigned int vectorization_factor = 0;
3411 int i;
3412 bool ok;
3413 tree scalar_type;
3415 if (vect_debug_details (NULL))
3416 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3418 for (i = 0; i < nbbs; i++)
3420 basic_block bb = bbs[i];
3422 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3424 tree stmt = bsi_stmt (si);
3425 unsigned int nunits;
3426 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3427 tree vectype;
3429 if (vect_debug_details (NULL))
3431 fprintf (dump_file, "==> examining statement: ");
3432 print_generic_expr (dump_file, stmt, TDF_SLIM);
3435 gcc_assert (stmt_info);
3437 /* skip stmts which do not need to be vectorized.
3438 this is expected to include:
3439 - the COND_EXPR which is the loop exit condition
3440 - any LABEL_EXPRs in the loop
3441 - computations that are used only for array indexing or loop
3442 control */
3444 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3446 if (vect_debug_details (NULL))
3447 fprintf (dump_file, "irrelevant.");
3448 continue;
3451 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3453 if (vect_debug_stats (loop) || vect_debug_details (loop))
3455 fprintf (dump_file, "not vectorized: vector stmt in loop:");
3456 print_generic_expr (dump_file, stmt, TDF_SLIM);
3458 return false;
3461 if (STMT_VINFO_DATA_REF (stmt_info))
3462 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3463 else if (TREE_CODE (stmt) == MODIFY_EXPR)
3464 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3465 else
3466 scalar_type = TREE_TYPE (stmt);
3468 if (vect_debug_details (NULL))
3470 fprintf (dump_file, "get vectype for scalar type: ");
3471 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3474 vectype = get_vectype_for_scalar_type (scalar_type);
3475 if (!vectype)
3477 if (vect_debug_stats (loop) || vect_debug_details (loop))
3479 fprintf (dump_file, "not vectorized: unsupported data-type ");
3480 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3482 return false;
3485 if (vect_debug_details (NULL))
3487 fprintf (dump_file, "vectype: ");
3488 print_generic_expr (dump_file, vectype, TDF_SLIM);
3490 STMT_VINFO_VECTYPE (stmt_info) = vectype;
3492 ok = (vectorizable_operation (stmt, NULL, NULL)
3493 || vectorizable_assignment (stmt, NULL, NULL)
3494 || vectorizable_load (stmt, NULL, NULL)
3495 || vectorizable_store (stmt, NULL, NULL));
3497 if (!ok)
3499 if (vect_debug_stats (loop) || vect_debug_details (loop))
3501 fprintf (dump_file, "not vectorized: stmt not supported: ");
3502 print_generic_expr (dump_file, stmt, TDF_SLIM);
3504 return false;
3507 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3508 if (vect_debug_details (NULL))
3509 fprintf (dump_file, "nunits = %d", nunits);
3511 if (vectorization_factor)
3513 /* FORNOW: don't allow mixed units.
3514 This restriction will be relaxed in the future. */
3515 if (nunits != vectorization_factor)
3517 if (vect_debug_stats (loop) || vect_debug_details (loop))
3518 fprintf (dump_file, "not vectorized: mixed data-types");
3519 return false;
3522 else
3523 vectorization_factor = nunits;
3525 #ifdef ENABLE_CHECKING
3526 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3527 * vectorization_factor == UNITS_PER_SIMD_WORD);
3528 #endif
3532 /* TODO: Analyze cost. Decide if worth while to vectorize. */
3534 if (vectorization_factor <= 1)
3536 if (vect_debug_stats (loop) || vect_debug_details (loop))
3537 fprintf (dump_file, "not vectorized: unsupported data-type");
3538 return false;
3540 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3542 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && vect_debug_details (NULL))
3543 fprintf (dump_file,
3544 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3545 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3547 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3548 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
3550 if (vect_debug_stats (loop) || vect_debug_details (loop))
3551 fprintf (dump_file, "not vectorized: iteration count too small.");
3552 return false;
3555 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3556 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3558 if (vect_debug_stats (loop) || vect_debug_details (loop))
3559 fprintf (dump_file, "epilog loop required.");
3560 if (!vect_can_advance_ivs_p (loop))
3562 if (vect_debug_stats (loop) || vect_debug_details (loop))
3563 fprintf (dump_file, "not vectorized: can't create epilog loop 1.");
3564 return false;
3566 if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0]))
3568 if (vect_debug_stats (loop) || vect_debug_details (loop))
3569 fprintf (dump_file, "not vectorized: can't create epilog loop 2.");
3570 return false;
3574 return true;
3578 /* Function exist_non_indexing_operands_for_use_p
3580 USE is one of the uses attached to STMT. Check if USE is
3581 used in STMT for anything other than indexing an array. */
3583 static bool
3584 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3586 tree operand;
3587 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3589 /* USE corresponds to some operand in STMT. If there is no data
3590 reference in STMT, then any operand that corresponds to USE
3591 is not indexing an array. */
3592 if (!STMT_VINFO_DATA_REF (stmt_info))
3593 return true;
3595 /* STMT has a data_ref. FORNOW this means that its of one of
3596 the following forms:
3597 -1- ARRAY_REF = var
3598 -2- var = ARRAY_REF
3599 (This should have been verified in analyze_data_refs).
3601 'var' in the second case corresponds to a def, not a use,
3602 so USE cannot correspond to any operands that are not used
3603 for array indexing.
3605 Therefore, all we need to check is if STMT falls into the
3606 first case, and whether var corresponds to USE. */
3608 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3609 return false;
3611 operand = TREE_OPERAND (stmt, 1);
3613 if (TREE_CODE (operand) != SSA_NAME)
3614 return false;
3616 if (operand == use)
3617 return true;
3619 return false;
3623 /* Function vect_is_simple_iv_evolution.
3625 FORNOW: A simple evolution of an induction variables in the loop is
3626 considered a polynomial evolution with constant step. */
3628 static bool
3629 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3630 tree * step, bool strict)
3632 tree init_expr;
3633 tree step_expr;
3635 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3637 /* When there is no evolution in this loop, the evolution function
3638 is not "simple". */
3639 if (evolution_part == NULL_TREE)
3640 return false;
3642 /* When the evolution is a polynomial of degree >= 2
3643 the evolution function is not "simple". */
3644 if (tree_is_chrec (evolution_part))
3645 return false;
3647 step_expr = evolution_part;
3648 init_expr = unshare_expr (initial_condition (access_fn));
3650 if (vect_debug_details (NULL))
3652 fprintf (dump_file, "step: ");
3653 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3654 fprintf (dump_file, ", init: ");
3655 print_generic_expr (dump_file, init_expr, TDF_SLIM);
3658 *init = init_expr;
3659 *step = step_expr;
3661 if (TREE_CODE (step_expr) != INTEGER_CST)
3663 if (vect_debug_details (NULL))
3664 fprintf (dump_file, "step unknown.");
3665 return false;
3668 if (strict)
3669 if (!integer_onep (step_expr))
3671 if (vect_debug_details (NULL))
3672 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3673 return false;
3676 return true;
3680 /* Function vect_analyze_scalar_cycles.
3682 Examine the cross iteration def-use cycles of scalar variables, by
3683 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3684 cycles that they represent do not impede vectorization.
3686 FORNOW: Reduction as in the following loop, is not supported yet:
3687 loop1:
3688 for (i=0; i<N; i++)
3689 sum += a[i];
3690 The cross-iteration cycle corresponding to variable 'sum' will be
3691 considered too complicated and will impede vectorization.
3693 FORNOW: Induction as in the following loop, is not supported yet:
3694 loop2:
3695 for (i=0; i<N; i++)
3696 a[i] = i;
3698 However, the following loop *is* vectorizable:
3699 loop3:
3700 for (i=0; i<N; i++)
3701 a[i] = b[i];
3703 In both loops there exists a def-use cycle for the variable i:
3704 loop: i_2 = PHI (i_0, i_1)
3705 a[i_2] = ...;
3706 i_1 = i_2 + 1;
3707 GOTO loop;
3709 The evolution of the above cycle is considered simple enough,
3710 however, we also check that the cycle does not need to be
3711 vectorized, i.e - we check that the variable that this cycle
3712 defines is only used for array indexing or in stmts that do not
3713 need to be vectorized. This is not the case in loop2, but it
3714 *is* the case in loop3. */
3716 static bool
3717 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3719 tree phi;
3720 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3721 basic_block bb = loop->header;
3722 tree dummy;
3724 if (vect_debug_details (NULL))
3725 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3727 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3729 tree access_fn = NULL;
3731 if (vect_debug_details (NULL))
3733 fprintf (dump_file, "Analyze phi: ");
3734 print_generic_expr (dump_file, phi, TDF_SLIM);
3737 /* Skip virtual phi's. The data dependences that are associated with
3738 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3740 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3742 if (vect_debug_details (NULL))
3743 fprintf (dump_file, "virtual phi. skip.");
3744 continue;
3747 /* Analyze the evolution function. */
3749 /* FORNOW: The only scalar cross-iteration cycles that we allow are
3750 those of loop induction variables; This property is verified here.
3752 Furthermore, if that induction variable is used in an operation
3753 that needs to be vectorized (i.e, is not solely used to index
3754 arrays and check the exit condition) - we do not support its
3755 vectorization yet. This property is verified in vect_is_simple_use,
3756 during vect_analyze_operations. */
3758 access_fn = /* instantiate_parameters
3759 (loop,*/
3760 analyze_scalar_evolution (loop, PHI_RESULT (phi));
3762 if (!access_fn)
3764 if (vect_debug_stats (loop) || vect_debug_details (loop))
3765 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3766 return false;
3769 if (vect_debug_details (NULL))
3771 fprintf (dump_file, "Access function of PHI: ");
3772 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3775 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
3776 &dummy, false))
3778 if (vect_debug_stats (loop) || vect_debug_details (loop))
3779 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3780 return false;
3784 return true;
3788 /* Function vect_analyze_data_ref_dependence.
3790 Return TRUE if there (might) exist a dependence between a memory-reference
3791 DRA and a memory-reference DRB. */
3793 static bool
3794 vect_analyze_data_ref_dependence (struct data_reference *dra,
3795 struct data_reference *drb,
3796 struct loop *loop)
3798 bool differ_p;
3799 struct data_dependence_relation *ddr;
3801 if (!array_base_name_differ_p (dra, drb, &differ_p))
3803 if (vect_debug_stats (loop) || vect_debug_details (loop))
3805 fprintf (dump_file,
3806 "not vectorized: can't determine dependence between: ");
3807 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3808 fprintf (dump_file, " and ");
3809 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3811 return true;
3814 if (differ_p)
3815 return false;
3817 ddr = initialize_data_dependence_relation (dra, drb);
3818 compute_affine_dependence (ddr);
3820 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
3821 return false;
3823 if (vect_debug_stats (loop) || vect_debug_details (loop))
3825 fprintf (dump_file,
3826 "not vectorized: possible dependence between data-refs ");
3827 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3828 fprintf (dump_file, " and ");
3829 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3832 return true;
3836 /* Function vect_analyze_data_ref_dependences.
3838 Examine all the data references in the loop, and make sure there do not
3839 exist any data dependences between them.
3841 TODO: dependences which distance is greater than the vectorization factor
3842 can be ignored. */
3844 static bool
3845 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
3847 unsigned int i, j;
3848 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3849 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3850 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3852 /* Examine store-store (output) dependences. */
3854 if (vect_debug_details (NULL))
3855 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
3857 if (vect_debug_details (NULL))
3858 fprintf (dump_file, "compare all store-store pairs.");
3860 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
3862 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3864 struct data_reference *dra =
3865 VARRAY_GENERIC_PTR (loop_write_refs, i);
3866 struct data_reference *drb =
3867 VARRAY_GENERIC_PTR (loop_write_refs, j);
3868 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3869 return false;
3873 /* Examine load-store (true/anti) dependences. */
3875 if (vect_debug_details (NULL))
3876 fprintf (dump_file, "compare all load-store pairs.");
3878 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
3880 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3882 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
3883 struct data_reference *drb =
3884 VARRAY_GENERIC_PTR (loop_write_refs, j);
3885 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3886 return false;
3890 return true;
3894 /* Function vect_get_first_index.
3896 REF is a data reference.
3897 If it is an ARRAY_REF: if its lower bound is simple enough,
3898 put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
3899 If it is not an ARRAY_REF: REF has no "first index";
3900 ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */
3902 static bool
3903 vect_get_first_index (tree ref, tree *array_first_index)
3905 tree array_start;
3907 if (TREE_CODE (ref) != ARRAY_REF)
3908 *array_first_index = size_zero_node;
3909 else
3911 array_start = array_ref_low_bound (ref);
3912 if (!host_integerp (array_start, 0))
3914 if (vect_debug_details (NULL))
3916 fprintf (dump_file, "array min val not simple integer cst.");
3917 print_generic_expr (dump_file, array_start, TDF_DETAILS);
3919 return false;
3921 *array_first_index = array_start;
3924 return true;
3928 /* Function vect_compute_array_base_alignment.
3929 A utility function of vect_compute_array_ref_alignment.
3931 Compute the misalignment of ARRAY in bits.
3933 Input:
3934 ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE.
3935 VECTYPE - we are interested in the misalignment modulo the size of vectype.
3936 if NULL: don't compute misalignment, just return the base of ARRAY.
3937 PREV_DIMENSIONS - initialized to one.
3938 MISALIGNMENT - the computed misalignment in bits.
3940 Output:
3941 If VECTYPE is not NULL:
3942 Return NULL_TREE if the misalignment cannot be computed. Otherwise, return
3943 the base of the array, and put the computed misalignment in MISALIGNMENT.
3944 If VECTYPE is NULL:
3945 Return the base of the array.
3947 For a[idx_N]...[idx_2][idx_1][idx_0], the address of
3948 a[idx_N]...[idx_2][idx_1] is
3949 {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ...
3950 ... + idx_N * dim_0 * ... * dim_N-1}.
3951 (The misalignment of &a is not checked here).
3952 Note, that every term contains dim_0, therefore, if dim_0 is a
3953 multiple of NUNITS, the whole sum is a multiple of NUNITS.
3954 Otherwise, if idx_1 is constant, and dim_1 is a multiple of
3955 NUINTS, we can say that the misalignment of the sum is equal to
3956 the misalignment of {idx_1 * dim_0}. If idx_1 is not constant,
3957 we can't determine this array misalignment, and we return
3958 false.
3959 We proceed recursively in this manner, accumulating total misalignment
3960 and the multiplication of previous dimensions for correct misalignment
3961 calculation. */
3963 static tree
3964 vect_compute_array_base_alignment (tree array,
3965 tree vectype,
3966 tree *prev_dimensions,
3967 tree *misalignment)
3969 tree index;
3970 tree domain;
3971 tree dimension_size;
3972 tree mis;
3973 tree bits_per_vectype;
3974 tree bits_per_vectype_unit;
3976 /* The 'stop condition' of the recursion. */
3977 if (TREE_CODE (array) != ARRAY_REF)
3978 return array;
3980 if (!vectype)
3981 /* Just get the base decl. */
3982 return vect_compute_array_base_alignment
3983 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
3985 if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) ||
3986 !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions))
3987 return NULL_TREE;
3989 domain = TYPE_DOMAIN (TREE_TYPE (array));
3990 dimension_size =
3991 int_const_binop (PLUS_EXPR,
3992 int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain),
3993 TYPE_MIN_VALUE (domain), 1),
3994 size_one_node, 1);
3996 /* Check if the dimension size is a multiple of NUNITS, the remaining sum
3997 is a multiple of NUNITS:
3999 dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ?
4001 mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size,
4002 build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1);
4003 if (integer_zerop (mis))
4004 /* This array is aligned. Continue just in order to get the base decl. */
4005 return vect_compute_array_base_alignment
4006 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
4008 index = TREE_OPERAND (array, 1);
4009 if (!host_integerp (index, 1))
4010 /* The current index is not constant. */
4011 return NULL_TREE;
4013 index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0);
4015 bits_per_vectype = fold_convert (unsigned_type_node,
4016 build_int_cst (NULL_TREE, BITS_PER_UNIT *
4017 GET_MODE_SIZE (TYPE_MODE (vectype))));
4018 bits_per_vectype_unit = fold_convert (unsigned_type_node,
4019 build_int_cst (NULL_TREE, BITS_PER_UNIT *
4020 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype)))));
4022 /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed
4023 earlier:
4025 *misalignment =
4026 (*misalignment + index_val * dimension_size * *prev_dimensions)
4027 % vectype_nunits;
4030 mis = int_const_binop (MULT_EXPR, index, dimension_size, 1);
4031 mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1);
4032 mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1);
4033 mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1);
4034 *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1);
4037 *prev_dimensions = int_const_binop (MULT_EXPR,
4038 *prev_dimensions, dimension_size, 1);
4040 return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype,
4041 prev_dimensions,
4042 misalignment);
4046 /* Function vect_compute_data_ref_alignment
4048 Compute the misalignment of the data reference DR.
4050 Output:
4051 1. If during the misalignment computation it is found that the data reference
4052 cannot be vectorized then false is returned.
4053 2. DR_MISALIGNMENT (DR) is defined.
4055 FOR NOW: No analysis is actually performed. Misalignment is calculated
4056 only for trivial cases. TODO. */
4058 static bool
4059 vect_compute_data_ref_alignment (struct data_reference *dr,
4060 loop_vec_info loop_vinfo)
4062 tree stmt = DR_STMT (dr);
4063 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4064 tree ref = DR_REF (dr);
4065 tree vectype;
4066 tree scalar_type;
4067 tree offset = size_zero_node;
4068 tree base, bit_offset, alignment;
4069 tree unit_bits = fold_convert (unsigned_type_node,
4070 build_int_cst (NULL_TREE, BITS_PER_UNIT));
4071 tree dr_base;
4072 bool base_aligned_p;
4074 if (vect_debug_details (NULL))
4075 fprintf (dump_file, "vect_compute_data_ref_alignment:");
4077 /* Initialize misalignment to unknown. */
4078 DR_MISALIGNMENT (dr) = -1;
4080 scalar_type = TREE_TYPE (ref);
4081 vectype = get_vectype_for_scalar_type (scalar_type);
4082 if (!vectype)
4084 if (vect_debug_details (NULL))
4086 fprintf (dump_file, "no vectype for stmt: ");
4087 print_generic_expr (dump_file, stmt, TDF_SLIM);
4088 fprintf (dump_file, " scalar_type: ");
4089 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
4091 /* It is not possible to vectorize this data reference. */
4092 return false;
4094 STMT_VINFO_VECTYPE (stmt_info) = vectype;
4095 gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF);
4097 if (TREE_CODE (ref) == ARRAY_REF)
4098 dr_base = ref;
4099 else
4100 dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4102 base = vect_get_base_and_bit_offset (dr, dr_base, vectype,
4103 loop_vinfo, &bit_offset, &base_aligned_p);
4104 if (!base)
4106 if (vect_debug_details (NULL))
4108 fprintf (dump_file, "Unknown alignment for access: ");
4109 print_generic_expr (dump_file,
4110 STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM);
4112 return true;
4115 if (!base_aligned_p)
4117 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4119 if (vect_debug_details (NULL))
4121 fprintf (dump_file, "can't force alignment of ref: ");
4122 print_generic_expr (dump_file, ref, TDF_SLIM);
4124 return true;
4127 /* Force the alignment of the decl.
4128 NOTE: This is the only change to the code we make during
4129 the analysis phase, before deciding to vectorize the loop. */
4130 if (vect_debug_details (NULL))
4131 fprintf (dump_file, "force alignment");
4132 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4133 DECL_USER_ALIGN (base) = TYPE_ALIGN (vectype);
4136 /* At this point we assume that the base is aligned, and the offset from it
4137 (including index, if relevant) has been computed and is in BIT_OFFSET. */
4138 gcc_assert (base_aligned_p
4139 || (TREE_CODE (base) == VAR_DECL
4140 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4142 /* Convert into bytes. */
4143 offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
4144 /* Check that there is no remainder in bits. */
4145 bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
4146 if (!integer_zerop (bit_offset))
4148 if (vect_debug_details (NULL))
4150 fprintf (dump_file, "bit offset alignment: ");
4151 print_generic_expr (dump_file, bit_offset, TDF_SLIM);
4153 return false;
4156 /* Alignment required, in bytes: */
4157 alignment = fold_convert (unsigned_type_node,
4158 build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT));
4160 /* Modulo alignment. */
4161 offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0);
4162 if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset))
4164 if (vect_debug_details (NULL))
4165 fprintf (dump_file, "unexpected misalign value");
4166 return false;
4169 DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1);
4171 if (vect_debug_details (NULL))
4172 fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4174 return true;
4178 /* Function vect_compute_array_ref_alignment
4180 Compute the alignment of an array-ref.
4181 The alignment we compute here is relative to
4182 TYPE_ALIGN(VECTYPE) boundary.
4184 Output:
4185 OFFSET - the alignment in bits
4186 Return value - the base of the array-ref. E.g,
4187 if the array-ref is a.b[k].c[i][j] the returned
4188 base is a.b[k].c
4191 static tree
4192 vect_compute_array_ref_alignment (struct data_reference *dr,
4193 loop_vec_info loop_vinfo,
4194 tree vectype,
4195 tree *offset)
4197 tree array_first_index = size_zero_node;
4198 tree init;
4199 tree ref = DR_REF (dr);
4200 tree scalar_type = TREE_TYPE (ref);
4201 tree oprnd0 = TREE_OPERAND (ref, 0);
4202 tree dims = size_one_node;
4203 tree misalign = size_zero_node;
4204 tree next_ref, this_offset = size_zero_node;
4205 tree nunits;
4206 tree nbits;
4208 if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE)
4209 /* The reference is an array without its last index. */
4210 next_ref = vect_compute_array_base_alignment (ref, vectype, &dims,
4211 &misalign);
4212 else
4213 next_ref = vect_compute_array_base_alignment (oprnd0, vectype, &dims,
4214 &misalign);
4215 if (!vectype)
4216 /* Alignment is not requested. Just return the base. */
4217 return next_ref;
4219 /* Compute alignment. */
4220 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref)
4221 return NULL_TREE;
4222 this_offset = misalign;
4224 /* Check the first index accessed. */
4225 if (!vect_get_first_index (ref, &array_first_index))
4227 if (vect_debug_details (NULL))
4228 fprintf (dump_file, "no first_index for array.");
4229 return NULL_TREE;
4232 /* Check the index of the array_ref. */
4233 init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0),
4234 LOOP_VINFO_LOOP (loop_vinfo)->num);
4236 /* FORNOW: In order to simplify the handling of alignment, we make sure
4237 that the first location at which the array is accessed ('init') is on an
4238 'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
4239 This is too conservative, since we require that
4240 both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
4241 NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
4242 This should be relaxed in the future. */
4244 if (!init || !host_integerp (init, 0))
4246 if (vect_debug_details (NULL))
4247 fprintf (dump_file, "non constant init. ");
4248 return NULL_TREE;
4251 /* bytes per scalar element: */
4252 nunits = fold_convert (unsigned_type_node,
4253 build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type))));
4254 nbits = int_const_binop (MULT_EXPR, nunits,
4255 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
4257 /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */
4258 misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
4259 misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0);
4260 misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0);
4262 /* TODO: allow negative misalign values. */
4263 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign))
4265 if (vect_debug_details (NULL))
4266 fprintf (dump_file, "unexpected misalign value");
4267 return NULL_TREE;
4269 *offset = misalign;
4270 return next_ref;
4274 /* Function vect_compute_data_refs_alignment
4276 Compute the misalignment of data references in the loop.
4277 This pass may take place at function granularity instead of at loop
4278 granularity.
4280 FOR NOW: No analysis is actually performed. Misalignment is calculated
4281 only for trivial cases. TODO. */
4283 static bool
4284 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4286 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4287 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4288 unsigned int i;
4290 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4292 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4293 if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4294 return false;
4297 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4299 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4300 if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4301 return false;
4304 return true;
4308 /* Function vect_enhance_data_refs_alignment
4310 This pass will use loop versioning and loop peeling in order to enhance
4311 the alignment of data references in the loop.
4313 FOR NOW: we assume that whatever versioning/peeling takes place, only the
4314 original loop is to be vectorized; Any other loops that are created by
4315 the transformations performed in this pass - are not supposed to be
4316 vectorized. This restriction will be relaxed. */
4318 static void
4319 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4321 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4322 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4323 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4324 unsigned int i;
4327 This pass will require a cost model to guide it whether to apply peeling
4328 or versioning or a combination of the two. For example, the scheme that
4329 intel uses when given a loop with several memory accesses, is as follows:
4330 choose one memory access ('p') which alignment you want to force by doing
4331 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4332 other accesses are not necessarily aligned, or (2) use loop versioning to
4333 generate one loop in which all accesses are aligned, and another loop in
4334 which only 'p' is necessarily aligned.
4336 ("Automatic Intra-Register Vectorization for the Intel Architecture",
4337 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4338 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4340 Devising a cost model is the most critical aspect of this work. It will
4341 guide us on which access to peel for, whether to use loop versioning, how
4342 many versions to create, etc. The cost model will probably consist of
4343 generic considerations as well as target specific considerations (on
4344 powerpc for example, misaligned stores are more painful than misaligned
4345 loads).
4347 Here is the general steps involved in alignment enhancements:
4349 -- original loop, before alignment analysis:
4350 for (i=0; i<N; i++){
4351 x = q[i]; # DR_MISALIGNMENT(q) = unknown
4352 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4355 -- After vect_compute_data_refs_alignment:
4356 for (i=0; i<N; i++){
4357 x = q[i]; # DR_MISALIGNMENT(q) = 3
4358 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4361 -- Possibility 1: we do loop versioning:
4362 if (p is aligned) {
4363 for (i=0; i<N; i++){ # loop 1A
4364 x = q[i]; # DR_MISALIGNMENT(q) = 3
4365 p[i] = y; # DR_MISALIGNMENT(p) = 0
4368 else {
4369 for (i=0; i<N; i++){ # loop 1B
4370 x = q[i]; # DR_MISALIGNMENT(q) = 3
4371 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4375 -- Possibility 2: we do loop peeling:
4376 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4377 x = q[i];
4378 p[i] = y;
4380 for (i = 3; i < N; i++){ # loop 2A
4381 x = q[i]; # DR_MISALIGNMENT(q) = 0
4382 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4385 -- Possibility 3: combination of loop peeling and versioning:
4386 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4387 x = q[i];
4388 p[i] = y;
4390 if (p is aligned) {
4391 for (i = 3; i<N; i++){ # loop 3A
4392 x = q[i]; # DR_MISALIGNMENT(q) = 0
4393 p[i] = y; # DR_MISALIGNMENT(p) = 0
4396 else {
4397 for (i = 3; i<N; i++){ # loop 3B
4398 x = q[i]; # DR_MISALIGNMENT(q) = 0
4399 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4403 These loops are later passed to loop_transform to be vectorized. The
4404 vectorizer will use the alignment information to guide the transformation
4405 (whether to generate regular loads/stores, or with special handling for
4406 misalignment).
4409 /* (1) Peeling to force alignment. */
4411 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4412 Considerations:
4413 + How many accesses will become aligned due to the peeling
4414 - How many accesses will become unaligned due to the peeling,
4415 and the cost of misaligned accesses.
4416 - The cost of peeling (the extra runtime checks, the increase
4417 in code size).
4419 The scheme we use FORNOW: peel to force the alignment of the first
4420 misaligned store in the loop.
4421 Rationale: misaligned stores are not yet supported.
4423 TODO: Use a better cost model. */
4425 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4427 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4428 if (!aligned_access_p (dr))
4430 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4431 LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4432 break;
4436 if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4438 if (vect_debug_details (loop))
4439 fprintf (dump_file, "Peeling for alignment will not be applied.");
4440 return;
4442 else
4443 if (vect_debug_details (loop))
4444 fprintf (dump_file, "Peeling for alignment will be applied.");
4447 /* (1.2) Update the alignment info according to the peeling factor.
4448 If the misalignment of the DR we peel for is M, then the
4449 peeling factor is VF - M, and the misalignment of each access DR_i
4450 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4451 If the misalignment of the DR we peel for is unknown, then the
4452 misalignment of each access DR_i in the loop is also unknown.
4454 FORNOW: set the misalignment of the accesses to unknown even
4455 if the peeling factor is known at compile time.
4457 TODO: - if the peeling factor is known at compile time, use that
4458 when updating the misalignment info of the loop DRs.
4459 - consider accesses that are known to have the same
4460 alignment, even if that alignment is unknown. */
4462 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4464 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4465 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4466 DR_MISALIGNMENT (dr) = 0;
4467 else
4468 DR_MISALIGNMENT (dr) = -1;
4470 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4472 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4473 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4474 DR_MISALIGNMENT (dr) = 0;
4475 else
4476 DR_MISALIGNMENT (dr) = -1;
4481 /* Function vect_analyze_data_refs_alignment
4483 Analyze the alignment of the data-references in the loop.
4484 FOR NOW: Until support for misliagned accesses is in place, only if all
4485 accesses are aligned can the loop be vectorized. This restriction will be
4486 relaxed. */
4488 static bool
4489 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4491 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4492 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4493 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4494 enum dr_alignment_support supportable_dr_alignment;
4495 unsigned int i;
4497 if (vect_debug_details (NULL))
4498 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4501 /* This pass may take place at function granularity instead of at loop
4502 granularity. */
4504 if (!vect_compute_data_refs_alignment (loop_vinfo))
4506 if (vect_debug_details (loop) || vect_debug_stats (loop))
4507 fprintf (dump_file,
4508 "not vectorized: can't calculate alignment for data ref.");
4509 return false;
4513 /* This pass will decide on using loop versioning and/or loop peeling in
4514 order to enhance the alignment of data references in the loop. */
4516 vect_enhance_data_refs_alignment (loop_vinfo);
4519 /* Finally, check that all the data references in the loop can be
4520 handled with respect to their alignment. */
4522 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4524 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4525 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4526 if (!supportable_dr_alignment)
4528 if (vect_debug_details (loop) || vect_debug_stats (loop))
4529 fprintf (dump_file, "not vectorized: unsupported unaligned load.");
4530 return false;
4533 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4535 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4536 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4537 if (!supportable_dr_alignment)
4539 if (vect_debug_details (loop) || vect_debug_stats (loop))
4540 fprintf (dump_file, "not vectorized: unsupported unaligned store.");
4541 return false;
4545 return true;
4549 /* Function vect_analyze_data_ref_access.
4551 Analyze the access pattern of the data-reference DR. For now, a data access
4552 has to consecutive and aligned to be considered vectorizable. */
4554 static bool
4555 vect_analyze_data_ref_access (struct data_reference *dr)
4557 varray_type access_fns = DR_ACCESS_FNS (dr);
4558 tree access_fn;
4559 tree init, step;
4560 unsigned int dimensions, i;
4562 /* Check that in case of multidimensional array ref A[i1][i2]..[iN],
4563 i1, i2, ..., iN-1 are loop invariant (to make sure that the memory
4564 access is contiguous). */
4565 dimensions = VARRAY_ACTIVE_SIZE (access_fns);
4567 for (i = 1; i < dimensions; i++) /* Not including the last dimension. */
4569 access_fn = DR_ACCESS_FN (dr, i);
4571 if (evolution_part_in_loop_num (access_fn,
4572 loop_containing_stmt (DR_STMT (dr))->num))
4574 /* Evolution part is not NULL in this loop (it is neither constant
4575 nor invariant). */
4576 if (vect_debug_details (NULL))
4578 fprintf (dump_file,
4579 "not vectorized: complicated multidim. array access.");
4580 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4582 return false;
4586 access_fn = DR_ACCESS_FN (dr, 0); /* The last dimension access function. */
4587 if (!evolution_function_is_constant_p (access_fn)
4588 && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
4589 access_fn, &init, &step, true))
4591 if (vect_debug_details (NULL))
4593 fprintf (dump_file, "not vectorized: complicated access function.");
4594 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4596 return false;
4599 return true;
4603 /* Function vect_analyze_data_ref_accesses.
4605 Analyze the access pattern of all the data references in the loop.
4607 FORNOW: the only access pattern that is considered vectorizable is a
4608 simple step 1 (consecutive) access.
4610 FORNOW: handle only arrays and pointer accesses. */
4612 static bool
4613 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4615 unsigned int i;
4616 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4617 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4619 if (vect_debug_details (NULL))
4620 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4622 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4624 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4625 bool ok = vect_analyze_data_ref_access (dr);
4626 if (!ok)
4628 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4629 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4630 fprintf (dump_file, "not vectorized: complicated access pattern.");
4631 return false;
4635 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4637 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4638 bool ok = vect_analyze_data_ref_access (dr);
4639 if (!ok)
4641 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4642 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4643 fprintf (dump_file, "not vectorized: complicated access pattern.");
4644 return false;
4648 return true;
4652 /* Function vect_analyze_pointer_ref_access.
4654 Input:
4655 STMT - a stmt that contains a data-ref
4656 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4658 If the data-ref access is vectorizable, return a data_reference structure
4659 that represents it (DR). Otherwise - return NULL. */
4661 static struct data_reference *
4662 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4664 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4665 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
4666 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4667 tree init, step;
4668 int step_val;
4669 tree reftype, innertype;
4670 enum machine_mode innermode;
4671 tree indx_access_fn;
4672 int loopnum = loop->num;
4673 struct data_reference *dr;
4675 if (!access_fn)
4677 if (vect_debug_stats (loop) || vect_debug_details (loop))
4678 fprintf (dump_file, "not vectorized: complicated pointer access.");
4679 return NULL;
4682 if (vect_debug_details (NULL))
4684 fprintf (dump_file, "Access function of ptr: ");
4685 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4688 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4690 if (vect_debug_stats (loop) || vect_debug_details (loop))
4691 fprintf (dump_file, "not vectorized: pointer access is not simple.");
4692 return NULL;
4695 STRIP_NOPS (init);
4697 if (!host_integerp (step,0))
4699 if (vect_debug_stats (loop) || vect_debug_details (loop))
4700 fprintf (dump_file,
4701 "not vectorized: non constant step for pointer access.");
4702 return NULL;
4705 step_val = TREE_INT_CST_LOW (step);
4707 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4708 if (TREE_CODE (reftype) != POINTER_TYPE)
4710 if (vect_debug_stats (loop) || vect_debug_details (loop))
4711 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4712 return NULL;
4715 reftype = TREE_TYPE (init);
4716 if (TREE_CODE (reftype) != POINTER_TYPE)
4718 if (vect_debug_stats (loop) || vect_debug_details (loop))
4719 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4720 return NULL;
4723 innertype = TREE_TYPE (reftype);
4724 innermode = TYPE_MODE (innertype);
4725 if (GET_MODE_SIZE (innermode) != step_val)
4727 /* FORNOW: support only consecutive access */
4728 if (vect_debug_stats (loop) || vect_debug_details (loop))
4729 fprintf (dump_file, "not vectorized: non consecutive access.");
4730 return NULL;
4733 indx_access_fn =
4734 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4735 if (vect_debug_details (NULL))
4737 fprintf (dump_file, "Access function of ptr indx: ");
4738 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4740 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4741 return dr;
4745 /* Function vect_get_symbl_and_dr.
4747 The function returns SYMBL - the relevant variable for
4748 memory tag (for aliasing purposes).
4749 Also data reference structure DR is created.
4751 Input:
4752 MEMREF - data reference in STMT
4753 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4755 Output:
4756 DR - data_reference struct for MEMREF
4757 return value - the relevant variable for memory tag (for aliasing purposes).
4761 static tree
4762 vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read,
4763 loop_vec_info loop_vinfo, struct data_reference **dr)
4765 tree symbl, oprnd0, oprnd1;
4766 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4767 tree offset;
4768 tree array_base, base;
4769 struct data_reference *new_dr;
4770 bool base_aligned_p;
4772 *dr = NULL;
4773 switch (TREE_CODE (memref))
4775 case INDIRECT_REF:
4776 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4777 if (! new_dr)
4778 return NULL_TREE;
4779 *dr = new_dr;
4780 symbl = DR_BASE_NAME (new_dr);
4781 STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl;
4783 switch (TREE_CODE (symbl))
4785 case PLUS_EXPR:
4786 case MINUS_EXPR:
4787 oprnd0 = TREE_OPERAND (symbl, 0);
4788 oprnd1 = TREE_OPERAND (symbl, 1);
4790 STRIP_NOPS(oprnd1);
4791 /* Only {address_base + offset} expressions are supported,
4792 where address_base can be POINTER_TYPE or ARRAY_TYPE and
4793 offset can be anything but POINTER_TYPE or ARRAY_TYPE.
4794 TODO: swap operands if {offset + address_base}. */
4795 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4796 && TREE_CODE (oprnd1) != INTEGER_CST)
4797 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4798 return NULL_TREE;
4800 if (TREE_CODE (TREE_TYPE (oprnd0)) == POINTER_TYPE)
4801 symbl = oprnd0;
4802 else
4803 symbl = vect_get_symbl_and_dr (oprnd0, stmt, is_read,
4804 loop_vinfo, &new_dr);
4806 case SSA_NAME:
4807 case ADDR_EXPR:
4808 /* symbl remains unchanged. */
4809 break;
4811 default:
4812 if (vect_debug_details (NULL))
4814 fprintf (dump_file, "unhandled data ref: ");
4815 print_generic_expr (dump_file, memref, TDF_SLIM);
4816 fprintf (dump_file, " (symbl ");
4817 print_generic_expr (dump_file, symbl, TDF_SLIM);
4818 fprintf (dump_file, ") in stmt ");
4819 print_generic_expr (dump_file, stmt, TDF_SLIM);
4821 return NULL_TREE;
4823 break;
4825 case ARRAY_REF:
4826 offset = size_zero_node;
4828 /* Store the array base in the stmt info.
4829 For one dimensional array ref a[i], the base is a,
4830 for multidimensional a[i1][i2]..[iN], the base is
4831 a[i1][i2]..[iN-1]. */
4832 array_base = TREE_OPERAND (memref, 0);
4833 STMT_VINFO_VECT_DR_BASE (stmt_info) = array_base;
4835 new_dr = analyze_array (stmt, memref, is_read);
4836 *dr = new_dr;
4838 /* Find the relevant symbol for aliasing purposes. */
4839 base = DR_BASE_NAME (new_dr);
4840 switch (TREE_CODE (base))
4842 case VAR_DECL:
4843 symbl = base;
4844 break;
4846 case INDIRECT_REF:
4847 symbl = TREE_OPERAND (base, 0);
4848 break;
4850 case COMPONENT_REF:
4851 /* Could have recorded more accurate information -
4852 i.e, the actual FIELD_DECL that is being referenced -
4853 but later passes expect VAR_DECL as the nmt. */
4854 symbl = vect_get_base_and_bit_offset (new_dr, base, NULL_TREE,
4855 loop_vinfo, &offset, &base_aligned_p);
4856 if (symbl)
4857 break;
4858 /* fall through */
4859 default:
4860 if (vect_debug_details (NULL))
4862 fprintf (dump_file, "unhandled struct/class field access ");
4863 print_generic_expr (dump_file, stmt, TDF_SLIM);
4865 return NULL_TREE;
4867 break;
4869 default:
4870 if (vect_debug_details (NULL))
4872 fprintf (dump_file, "unhandled data ref: ");
4873 print_generic_expr (dump_file, memref, TDF_SLIM);
4874 fprintf (dump_file, " in stmt ");
4875 print_generic_expr (dump_file, stmt, TDF_SLIM);
4877 return NULL_TREE;
4879 return symbl;
4883 /* Function vect_analyze_data_refs.
4885 Find all the data references in the loop.
4887 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
4888 which base is really an array (not a pointer) and which alignment
4889 can be forced. This restriction will be relaxed. */
4891 static bool
4892 vect_analyze_data_refs (loop_vec_info loop_vinfo)
4894 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4895 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4896 int nbbs = loop->num_nodes;
4897 block_stmt_iterator si;
4898 int j;
4899 struct data_reference *dr;
4900 tree tag;
4901 tree address_base;
4902 bool base_aligned_p;
4903 tree offset;
4905 if (vect_debug_details (NULL))
4906 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4908 for (j = 0; j < nbbs; j++)
4910 basic_block bb = bbs[j];
4911 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4913 bool is_read = false;
4914 tree stmt = bsi_stmt (si);
4915 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4916 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4917 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4918 vuse_optype vuses = STMT_VUSE_OPS (stmt);
4919 varray_type *datarefs = NULL;
4920 int nvuses, nv_may_defs, nv_must_defs;
4921 tree memref = NULL;
4922 tree symbl;
4924 /* Assumption: there exists a data-ref in stmt, if and only if
4925 it has vuses/vdefs. */
4927 if (!vuses && !v_may_defs && !v_must_defs)
4928 continue;
4930 nvuses = NUM_VUSES (vuses);
4931 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4932 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4934 if (nvuses && (nv_may_defs || nv_must_defs))
4936 if (vect_debug_details (NULL))
4938 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4939 print_generic_expr (dump_file, stmt, TDF_SLIM);
4941 return false;
4944 if (TREE_CODE (stmt) != MODIFY_EXPR)
4946 if (vect_debug_details (NULL))
4948 fprintf (dump_file, "unexpected vops in stmt: ");
4949 print_generic_expr (dump_file, stmt, TDF_SLIM);
4951 return false;
4954 if (vuses)
4956 memref = TREE_OPERAND (stmt, 1);
4957 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
4958 is_read = true;
4960 else /* vdefs */
4962 memref = TREE_OPERAND (stmt, 0);
4963 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
4964 is_read = false;
4967 /* Analyze MEMREF. If it is of a supported form, build data_reference
4968 struct for it (DR) and find the relevant symbol for aliasing
4969 purposes. */
4970 symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo,
4971 &dr);
4972 if (!symbl)
4974 if (vect_debug_stats (loop) || vect_debug_details (loop))
4976 fprintf (dump_file, "not vectorized: unhandled data ref: ");
4977 print_generic_expr (dump_file, stmt, TDF_SLIM);
4979 return false;
4982 /* Find and record the memtag assigned to this data-ref. */
4983 switch (TREE_CODE (symbl))
4985 case VAR_DECL:
4986 STMT_VINFO_MEMTAG (stmt_info) = symbl;
4987 break;
4989 case SSA_NAME:
4990 symbl = SSA_NAME_VAR (symbl);
4991 tag = get_var_ann (symbl)->type_mem_tag;
4992 if (!tag)
4994 tree ptr = TREE_OPERAND (memref, 0);
4995 if (TREE_CODE (ptr) == SSA_NAME)
4996 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
4998 if (!tag)
5000 if (vect_debug_stats (loop) || vect_debug_details (loop))
5001 fprintf (dump_file, "not vectorized: no memtag for ref.");
5002 return false;
5004 STMT_VINFO_MEMTAG (stmt_info) = tag;
5005 break;
5007 case ADDR_EXPR:
5008 address_base = TREE_OPERAND (symbl, 0);
5010 switch (TREE_CODE (address_base))
5012 case ARRAY_REF:
5013 dr = analyze_array (stmt, TREE_OPERAND (symbl, 0),
5014 DR_IS_READ(dr));
5015 tag = vect_get_base_and_bit_offset (dr, DR_BASE_NAME (dr),
5016 NULL_TREE, loop_vinfo, &offset, &base_aligned_p);
5017 if (!tag)
5019 if (vect_debug_stats (loop) || vect_debug_details (loop))
5020 fprintf (dump_file, "not vectorized: no memtag for ref.");
5021 return false;
5023 STMT_VINFO_MEMTAG (stmt_info) = tag;
5024 break;
5026 case VAR_DECL:
5027 STMT_VINFO_MEMTAG (stmt_info) = address_base;
5028 break;
5030 default:
5031 if (vect_debug_stats (loop) || vect_debug_details (loop))
5033 fprintf (dump_file,
5034 "not vectorized: unhandled address expr: ");
5035 print_generic_expr (dump_file, stmt, TDF_SLIM);
5037 return false;
5039 break;
5041 default:
5042 if (vect_debug_stats (loop) || vect_debug_details (loop))
5044 fprintf (dump_file, "not vectorized: unsupported data-ref: ");
5045 print_generic_expr (dump_file, memref, TDF_SLIM);
5047 return false;
5050 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
5051 STMT_VINFO_DATA_REF (stmt_info) = dr;
5055 return true;
5059 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
5061 /* Function vect_mark_relevant.
5063 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
5065 static void
5066 vect_mark_relevant (varray_type worklist, tree stmt)
5068 stmt_vec_info stmt_info;
5070 if (vect_debug_details (NULL))
5071 fprintf (dump_file, "mark relevant.");
5073 if (TREE_CODE (stmt) == PHI_NODE)
5075 VARRAY_PUSH_TREE (worklist, stmt);
5076 return;
5079 stmt_info = vinfo_for_stmt (stmt);
5081 if (!stmt_info)
5083 if (vect_debug_details (NULL))
5085 fprintf (dump_file, "mark relevant: no stmt info!!.");
5086 print_generic_expr (dump_file, stmt, TDF_SLIM);
5088 return;
5091 if (STMT_VINFO_RELEVANT_P (stmt_info))
5093 if (vect_debug_details (NULL))
5094 fprintf (dump_file, "already marked relevant.");
5095 return;
5098 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5099 VARRAY_PUSH_TREE (worklist, stmt);
5103 /* Function vect_stmt_relevant_p.
5105 Return true if STMT in loop that is represented by LOOP_VINFO is
5106 "relevant for vectorization".
5108 A stmt is considered "relevant for vectorization" if:
5109 - it has uses outside the loop.
5110 - it has vdefs (it alters memory).
5111 - control stmts in the loop (except for the exit condition).
5113 CHECKME: what other side effects would the vectorizer allow? */
5115 static bool
5116 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5118 v_may_def_optype v_may_defs;
5119 v_must_def_optype v_must_defs;
5120 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5121 int i;
5122 dataflow_t df;
5123 int num_uses;
5125 /* cond stmt other than loop exit cond. */
5126 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5127 return true;
5129 /* changing memory. */
5130 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5131 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5132 if (v_may_defs || v_must_defs)
5134 if (vect_debug_details (NULL))
5135 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5136 return true;
5139 /* uses outside the loop. */
5140 df = get_immediate_uses (stmt);
5141 num_uses = num_immediate_uses (df);
5142 for (i = 0; i < num_uses; i++)
5144 tree use = immediate_use (df, i);
5145 basic_block bb = bb_for_stmt (use);
5146 if (!flow_bb_inside_loop_p (loop, bb))
5148 if (vect_debug_details (NULL))
5149 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5150 return true;
5154 return false;
5158 /* Function vect_mark_stmts_to_be_vectorized.
5160 Not all stmts in the loop need to be vectorized. For example:
5162 for i...
5163 for j...
5164 1. T0 = i + j
5165 2. T1 = a[T0]
5167 3. j = j + 1
5169 Stmt 1 and 3 do not need to be vectorized, because loop control and
5170 addressing of vectorized data-refs are handled differently.
5172 This pass detects such stmts. */
5174 static bool
5175 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5177 varray_type worklist;
5178 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5179 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5180 unsigned int nbbs = loop->num_nodes;
5181 block_stmt_iterator si;
5182 tree stmt;
5183 stmt_ann_t ann;
5184 unsigned int i;
5185 int j;
5186 use_optype use_ops;
5187 stmt_vec_info stmt_info;
5189 if (vect_debug_details (NULL))
5190 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5192 VARRAY_TREE_INIT (worklist, 64, "work list");
5194 /* 1. Init worklist. */
5196 for (i = 0; i < nbbs; i++)
5198 basic_block bb = bbs[i];
5199 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5201 stmt = bsi_stmt (si);
5203 if (vect_debug_details (NULL))
5205 fprintf (dump_file, "init: stmt relevant? ");
5206 print_generic_expr (dump_file, stmt, TDF_SLIM);
5209 stmt_info = vinfo_for_stmt (stmt);
5210 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5212 if (vect_stmt_relevant_p (stmt, loop_vinfo))
5213 vect_mark_relevant (worklist, stmt);
5218 /* 2. Process_worklist */
5220 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5222 stmt = VARRAY_TOP_TREE (worklist);
5223 VARRAY_POP (worklist);
5225 if (vect_debug_details (NULL))
5227 fprintf (dump_file, "worklist: examine stmt: ");
5228 print_generic_expr (dump_file, stmt, TDF_SLIM);
5231 /* Examine the USES in this statement. Mark all the statements which
5232 feed this statement's uses as "relevant", unless the USE is used as
5233 an array index. */
5235 if (TREE_CODE (stmt) == PHI_NODE)
5237 /* follow the def-use chain inside the loop. */
5238 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5240 tree arg = PHI_ARG_DEF (stmt, j);
5241 tree def_stmt = NULL_TREE;
5242 basic_block bb;
5243 if (!vect_is_simple_use (arg, loop, &def_stmt))
5245 if (vect_debug_details (NULL))
5246 fprintf (dump_file, "worklist: unsupported use.");
5247 varray_clear (worklist);
5248 return false;
5250 if (!def_stmt)
5251 continue;
5253 if (vect_debug_details (NULL))
5255 fprintf (dump_file, "worklist: def_stmt: ");
5256 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5259 bb = bb_for_stmt (def_stmt);
5260 if (flow_bb_inside_loop_p (loop, bb))
5261 vect_mark_relevant (worklist, def_stmt);
5265 ann = stmt_ann (stmt);
5266 use_ops = USE_OPS (ann);
5268 for (i = 0; i < NUM_USES (use_ops); i++)
5270 tree use = USE_OP (use_ops, i);
5272 /* We are only interested in uses that need to be vectorized. Uses
5273 that are used for address computation are not considered relevant.
5275 if (exist_non_indexing_operands_for_use_p (use, stmt))
5277 tree def_stmt = NULL_TREE;
5278 basic_block bb;
5279 if (!vect_is_simple_use (use, loop, &def_stmt))
5281 if (vect_debug_details (NULL))
5282 fprintf (dump_file, "worklist: unsupported use.");
5283 varray_clear (worklist);
5284 return false;
5287 if (!def_stmt)
5288 continue;
5290 if (vect_debug_details (NULL))
5292 fprintf (dump_file, "worklist: examine use %d: ", i);
5293 print_generic_expr (dump_file, use, TDF_SLIM);
5296 bb = bb_for_stmt (def_stmt);
5297 if (flow_bb_inside_loop_p (loop, bb))
5298 vect_mark_relevant (worklist, def_stmt);
5301 } /* while worklist */
5303 varray_clear (worklist);
5304 return true;
5308 /* Function vect_can_advance_ivs_p
5310 In case the number of iterations that LOOP iterates in unknown at compile
5311 time, an epilog loop will be generated, and the loop induction variables
5312 (IVs) will be "advanced" to the value they are supposed to take just before
5313 the epilog loop. Here we check that the access function of the loop IVs
5314 and the expression that represents the loop bound are simple enough.
5315 These restrictions will be relaxed in the future. */
5317 static bool
5318 vect_can_advance_ivs_p (struct loop *loop)
5320 basic_block bb = loop->header;
5321 tree phi;
5323 /* Analyze phi functions of the loop header. */
5325 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5327 tree access_fn = NULL;
5328 tree evolution_part;
5330 if (vect_debug_details (NULL))
5332 fprintf (dump_file, "Analyze phi: ");
5333 print_generic_expr (dump_file, phi, TDF_SLIM);
5336 /* Skip virtual phi's. The data dependences that are associated with
5337 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
5339 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5341 if (vect_debug_details (NULL))
5342 fprintf (dump_file, "virtual phi. skip.");
5343 continue;
5346 /* Analyze the evolution function. */
5348 access_fn = instantiate_parameters
5349 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5351 if (!access_fn)
5353 if (vect_debug_details (NULL))
5354 fprintf (dump_file, "No Access function.");
5355 return false;
5358 if (vect_debug_details (NULL))
5360 fprintf (dump_file, "Access function of PHI: ");
5361 print_generic_expr (dump_file, access_fn, TDF_SLIM);
5364 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5366 if (evolution_part == NULL_TREE)
5367 return false;
5369 /* FORNOW: We do not transform initial conditions of IVs
5370 which evolution functions are a polynomial of degree >= 2. */
5372 if (tree_is_chrec (evolution_part))
5373 return false;
5376 return true;
5380 /* Function vect_get_loop_niters.
5382 Determine how many iterations the loop is executed.
5383 If an expression that represents the number of iterations
5384 can be constructed, place it in NUMBER_OF_ITERATIONS.
5385 Return the loop exit condition. */
5387 static tree
5388 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5390 tree niters;
5392 if (vect_debug_details (NULL))
5393 fprintf (dump_file, "\n<<get_loop_niters>>\n");
5395 niters = number_of_iterations_in_loop (loop);
5397 if (niters != NULL_TREE
5398 && niters != chrec_dont_know)
5400 *number_of_iterations = niters;
5402 if (vect_debug_details (NULL))
5404 fprintf (dump_file, "==> get_loop_niters:" );
5405 print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5409 return get_loop_exit_condition (loop);
5413 /* Function vect_analyze_loop_form.
5415 Verify the following restrictions (some may be relaxed in the future):
5416 - it's an inner-most loop
5417 - number of BBs = 2 (which are the loop header and the latch)
5418 - the loop has a pre-header
5419 - the loop has a single entry and exit
5420 - the loop exit condition is simple enough, and the number of iterations
5421 can be analyzed (a countable loop). */
5423 static loop_vec_info
5424 vect_analyze_loop_form (struct loop *loop)
5426 loop_vec_info loop_vinfo;
5427 tree loop_cond;
5428 tree number_of_iterations = NULL;
5429 bool rescan = false;
5431 if (vect_debug_details (loop))
5432 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5434 if (loop->inner
5435 || !loop->single_exit
5436 || loop->num_nodes != 2
5437 || EDGE_COUNT (loop->header->preds) != 2
5438 || loop->num_entries != 1)
5440 if (vect_debug_stats (loop) || vect_debug_details (loop))
5442 fprintf (dump_file, "not vectorized: bad loop form. ");
5443 if (loop->inner)
5444 fprintf (dump_file, "nested loop.");
5445 else if (!loop->single_exit)
5446 fprintf (dump_file, "multiple exits.");
5447 else if (loop->num_nodes != 2)
5448 fprintf (dump_file, "too many BBs in loop.");
5449 else if (EDGE_COUNT (loop->header->preds) != 2)
5450 fprintf (dump_file, "too many incoming edges.");
5451 else if (loop->num_entries != 1)
5452 fprintf (dump_file, "too many entries.");
5455 return NULL;
5458 /* We assume that the loop exit condition is at the end of the loop. i.e,
5459 that the loop is represented as a do-while (with a proper if-guard
5460 before the loop if needed), where the loop header contains all the
5461 executable statements, and the latch is empty. */
5462 if (!empty_block_p (loop->latch))
5464 if (vect_debug_stats (loop) || vect_debug_details (loop))
5465 fprintf (dump_file, "not vectorized: unexpectd loop form.");
5466 return NULL;
5469 /* Make sure we have a preheader basic block. */
5470 if (!loop->pre_header)
5472 rescan = true;
5473 loop_split_edge_with (loop_preheader_edge (loop), NULL);
5476 /* Make sure there exists a single-predecessor exit bb: */
5477 if (EDGE_COUNT (loop->exit_edges[0]->dest->preds) != 1)
5479 rescan = true;
5480 loop_split_edge_with (loop->exit_edges[0], NULL);
5483 if (rescan)
5485 flow_loop_scan (loop, LOOP_ALL);
5486 /* Flow loop scan does not update loop->single_exit field. */
5487 loop->single_exit = loop->exit_edges[0];
5490 if (empty_block_p (loop->header))
5492 if (vect_debug_stats (loop) || vect_debug_details (loop))
5493 fprintf (dump_file, "not vectorized: empty loop.");
5494 return NULL;
5497 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5498 if (!loop_cond)
5500 if (vect_debug_stats (loop) || vect_debug_details (loop))
5501 fprintf (dump_file, "not vectorized: complicated exit condition.");
5502 return NULL;
5505 if (!number_of_iterations)
5507 if (vect_debug_stats (loop) || vect_debug_details (loop))
5508 fprintf (dump_file,
5509 "not vectorized: number of iterations cannot be computed.");
5510 return NULL;
5513 if (chrec_contains_undetermined (number_of_iterations))
5515 if (vect_debug_details (NULL))
5516 fprintf (dump_file, "Infinite number of iterations.");
5517 return false;
5520 loop_vinfo = new_loop_vec_info (loop);
5521 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5523 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5525 if (vect_debug_details (loop))
5527 fprintf (dump_file, "loop bound unknown.\n");
5528 fprintf (dump_file, "Symbolic number of iterations is ");
5529 print_generic_expr (dump_file, number_of_iterations, TDF_DETAILS);
5532 else
5533 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5535 if (vect_debug_stats (loop) || vect_debug_details (loop))
5536 fprintf (dump_file, "not vectorized: number of iterations = 0.");
5537 return NULL;
5540 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5542 return loop_vinfo;
5546 /* Function vect_analyze_loop.
5548 Apply a set of analyses on LOOP, and create a loop_vec_info struct
5549 for it. The different analyses will record information in the
5550 loop_vec_info struct. */
5552 static loop_vec_info
5553 vect_analyze_loop (struct loop *loop)
5555 bool ok;
5556 loop_vec_info loop_vinfo;
5558 if (vect_debug_details (NULL))
5559 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5561 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
5563 loop_vinfo = vect_analyze_loop_form (loop);
5564 if (!loop_vinfo)
5566 if (vect_debug_details (loop))
5567 fprintf (dump_file, "bad loop form.");
5568 return NULL;
5571 /* Find all data references in the loop (which correspond to vdefs/vuses)
5572 and analyze their evolution in the loop.
5574 FORNOW: Handle only simple, array references, which
5575 alignment can be forced, and aligned pointer-references. */
5577 ok = vect_analyze_data_refs (loop_vinfo);
5578 if (!ok)
5580 if (vect_debug_details (loop))
5581 fprintf (dump_file, "bad data references.");
5582 destroy_loop_vec_info (loop_vinfo);
5583 return NULL;
5586 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
5588 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5589 if (!ok)
5591 if (vect_debug_details (loop))
5592 fprintf (dump_file, "unexpected pattern.");
5593 if (vect_debug_details (loop))
5594 fprintf (dump_file, "not vectorized: unexpected pattern.");
5595 destroy_loop_vec_info (loop_vinfo);
5596 return NULL;
5599 /* Check that all cross-iteration scalar data-flow cycles are OK.
5600 Cross-iteration cycles caused by virtual phis are analyzed separately. */
5602 ok = vect_analyze_scalar_cycles (loop_vinfo);
5603 if (!ok)
5605 if (vect_debug_details (loop))
5606 fprintf (dump_file, "bad scalar cycle.");
5607 destroy_loop_vec_info (loop_vinfo);
5608 return NULL;
5611 /* Analyze data dependences between the data-refs in the loop.
5612 FORNOW: fail at the first data dependence that we encounter. */
5614 ok = vect_analyze_data_ref_dependences (loop_vinfo);
5615 if (!ok)
5617 if (vect_debug_details (loop))
5618 fprintf (dump_file, "bad data dependence.");
5619 destroy_loop_vec_info (loop_vinfo);
5620 return NULL;
5623 /* Analyze the access patterns of the data-refs in the loop (consecutive,
5624 complex, etc.). FORNOW: Only handle consecutive access pattern. */
5626 ok = vect_analyze_data_ref_accesses (loop_vinfo);
5627 if (!ok)
5629 if (vect_debug_details (loop))
5630 fprintf (dump_file, "bad data access.");
5631 destroy_loop_vec_info (loop_vinfo);
5632 return NULL;
5635 /* Analyze the alignment of the data-refs in the loop.
5636 FORNOW: Only aligned accesses are handled. */
5638 ok = vect_analyze_data_refs_alignment (loop_vinfo);
5639 if (!ok)
5641 if (vect_debug_details (loop))
5642 fprintf (dump_file, "bad data alignment.");
5643 destroy_loop_vec_info (loop_vinfo);
5644 return NULL;
5647 /* Scan all the operations in the loop and make sure they are
5648 vectorizable. */
5650 ok = vect_analyze_operations (loop_vinfo);
5651 if (!ok)
5653 if (vect_debug_details (loop))
5654 fprintf (dump_file, "bad operation or unsupported loop bound.");
5655 destroy_loop_vec_info (loop_vinfo);
5656 return NULL;
5659 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5661 return loop_vinfo;
5665 /* Function need_imm_uses_for.
5667 Return whether we ought to include information for 'var'
5668 when calculating immediate uses. For this pass we only want use
5669 information for non-virtual variables. */
5671 static bool
5672 need_imm_uses_for (tree var)
5674 return is_gimple_reg (var);
5678 /* Function vectorize_loops.
5680 Entry Point to loop vectorization phase. */
5682 void
5683 vectorize_loops (struct loops *loops)
5685 unsigned int i, loops_num;
5686 unsigned int num_vectorized_loops = 0;
5688 /* Does the target support SIMD? */
5689 /* FORNOW: until more sophisticated machine modelling is in place. */
5690 if (!UNITS_PER_SIMD_WORD)
5692 if (vect_debug_details (NULL))
5693 fprintf (dump_file, "vectorizer: target vector size is not defined.");
5694 return;
5697 #ifdef ENABLE_CHECKING
5698 verify_loop_closed_ssa ();
5699 #endif
5701 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5703 /* ----------- Analyze loops. ----------- */
5705 /* If some loop was duplicated, it gets bigger number
5706 than all previously defined loops. This fact allows us to run
5707 only over initial loops skipping newly generated ones. */
5708 loops_num = loops->num;
5709 for (i = 1; i < loops_num; i++)
5711 loop_vec_info loop_vinfo;
5712 struct loop *loop = loops->parray[i];
5714 if (!loop)
5715 continue;
5717 loop_vinfo = vect_analyze_loop (loop);
5718 loop->aux = loop_vinfo;
5720 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5721 continue;
5723 vect_transform_loop (loop_vinfo, loops);
5724 num_vectorized_loops++;
5727 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5728 fprintf (dump_file, "\nvectorized %u loops in function.\n",
5729 num_vectorized_loops);
5731 /* ----------- Finalize. ----------- */
5733 free_df ();
5734 for (i = 1; i < loops_num; i++)
5736 struct loop *loop = loops->parray[i];
5737 loop_vec_info loop_vinfo;
5739 if (!loop)
5740 continue;
5741 loop_vinfo = loop->aux;
5742 destroy_loop_vec_info (loop_vinfo);
5743 loop->aux = NULL;
5746 rewrite_into_ssa (false);
5747 rewrite_into_loop_closed_ssa (); /* FORNOW */
5748 bitmap_clear (vars_to_rename);